5 * Created by Sarah Westcott on 4/29/09.
6 * Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
10 #include "consensuscommand.h"
12 //**********************************************************************************************************************
13 vector<string> ConcensusCommand::setParameters(){
15 vector<string> myArray;
16 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
20 m->errorOut(e, "ConcensusCommand", "setParameters");
24 //**********************************************************************************************************************
25 string ConcensusCommand::getHelpString(){
27 string helpString = "";
28 helpString += "The consensus command can only be executed after a successful read.tree command.\n";
29 helpString += "The consensus command has no parameters.\n";
30 helpString += "The consensus command should be in the following format: consensus().\n";
31 helpString += "The consensus command output two files: .consensus.tre and .consensuspairs.\n";
32 helpString += "The .consensus.tre file contains the consensus tree of the trees in your input file.\n";
33 helpString += "The branch lengths are the percentage of trees in your input file that had the given pair.\n";
34 helpString += "The .consensuspairs file contains a list of the internal nodes in your tree. For each node, the pair that was used in the consensus tree \n";
35 helpString += "is reported with its percentage, as well as the other pairs that were seen for that node but not used and their percentages.\n\n";
39 m->errorOut(e, "ConcensusCommand", "getHelpString");
43 //**********************************************************************************************************************
44 ConcensusCommand::ConcensusCommand(){
46 abort = true; calledHelp = true;
48 vector<string> tempOutNames;
49 outputTypes["tree"] = tempOutNames;
50 outputTypes["nodepairs"] = tempOutNames;
53 m->errorOut(e, "ConcensusCommand", "ConcensusCommand");
57 //**********************************************************************************************************************
59 ConcensusCommand::ConcensusCommand(string fileroot) {
61 abort = false; calledHelp = false;
65 //initialize outputTypes
66 vector<string> tempOutNames;
67 outputTypes["tree"] = tempOutNames;
68 outputTypes["nodepairs"] = tempOutNames;
74 m->errorOut(e, "ConcensusCommand", "ConcensusCommand");
78 //**********************************************************************************************************************
80 int ConcensusCommand::execute(){
83 if (abort == true) { if (calledHelp) { return 0; } return 2; }
85 m->mothurOut("This command is not currently in use."); m->mothurOutEndLine();
87 t = globaldata->gTree;
88 numNodes = t[0]->getNumNodes();
89 numLeaves = t[0]->getNumLeaves();
92 //get the possible pairings
95 if (m->control_pressed) { return 0; }
97 //open file for pairing not included in the tree
98 notIncluded = filename + ".cons.pairs"; outputNames.push_back(notIncluded); outputTypes["nodepairs"].push_back(notIncluded);
99 m->openOutputFile(notIncluded, out2);
101 consensusTree = new Tree();
103 it2 = nodePairs.find(treeSet);
105 nodePairsInTree[treeSet] = it2->second;
107 //erase treeset because you are adding it
108 nodePairs.erase(treeSet);
110 //set count to numLeaves;
113 buildConcensusTree(treeSet);
115 if (m->control_pressed) { delete consensusTree; return 0; }
117 consensusTree->assembleTree();
119 if (m->control_pressed) { delete consensusTree; return 0; }
121 //output species in order
122 out2 << "Species in Order: " << endl << endl;
123 for (int i = 0; i < treeSet.size(); i++) { out2 << i+1 << ". " << treeSet[i] << endl; }
125 //output sets included
126 out2 << endl << "Sets included in the consensus tree:" << endl << endl;
128 if (m->control_pressed) { delete consensusTree; return 0; }
131 for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) {
133 if (m->control_pressed) { delete consensusTree; return 0; }
135 //only output pairs not leaves
136 if (it2->first.size() > 1) {
138 //initialize temp to all "."
139 temp.resize(treeSet.size(), ".");
141 //set the spot in temp that represents it2->first[i] to a "*"
142 for (int i = 0; i < it2->first.size(); i++) {
144 int index = findSpot(it2->first[i]);
146 //temp[index] = it2->first[i] + " ";
150 for (int j = 0; j < temp.size(); j++) {
153 out2 << '\t' << it2->second << endl;
157 //output sets not included
158 out2 << endl << "Sets NOT included in the consensus tree:" << endl << endl;
159 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
161 if (m->control_pressed) { delete consensusTree; return 0; }
164 //initialize temp to all "."
165 temp.resize(treeSet.size(), ".");
167 //set the spot in temp that represents it2->first[i] to a "*"
168 for (int i = 0; i < it2->first.size(); i++) {
170 int index = findSpot(it2->first[i]);
175 for (int j = 0; j < temp.size(); j++) {
178 out2 << '\t' << it2->second << endl;
181 outputFile = filename + ".cons.tre"; outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
182 m->openOutputFile(outputFile, out);
184 consensusTree->print(out, "boot");
186 out.close(); out2.close();
188 delete consensusTree;
190 //set first tree file as new current treefile
191 string currentTree = "";
192 itTypes = outputTypes.find("tree");
193 if (itTypes != outputTypes.end()) {
194 if ((itTypes->second).size() != 0) { currentTree = (itTypes->second)[0]; m->setTreeFile(currentTree); }
199 catch(exception& e) {
200 m->errorOut(e, "ConcensusCommand", "execute");
205 //**********************************************************************************************************************
206 int ConcensusCommand::buildConcensusTree(vector<string> nodeSet) {
208 vector<string> leftChildSet;
209 vector<string> rightChildSet;
211 if (m->control_pressed) { return 1; }
213 //if you are at a leaf
214 if (nodeSet.size() == 1) {
215 //return the vector index of the leaf you are at
216 return consensusTree->getIndex(nodeSet[0]);
217 //terminate recursion
218 }else if (count == numNodes) { return 0; }
220 //finds best child pair
221 leftChildSet = getNextAvailableSet(nodeSet, rightChildSet);
222 int left = buildConcensusTree(leftChildSet);
223 int right = buildConcensusTree(rightChildSet);
224 consensusTree->tree[count].setChildren(left, right);
225 consensusTree->tree[count].setLabel(nodePairsInTree[nodeSet]);
226 consensusTree->tree[left].setParent(count);
227 consensusTree->tree[right].setParent(count);
233 catch(exception& e) {
234 m->errorOut(e, "ConcensusCommand", "buildConcensusTree");
239 //**********************************************************************************************************************
240 int ConcensusCommand::getSets() {
245 //for each tree add the possible pairs you find
246 for (int i = 0; i < t.size(); i++) {
248 //for each non-leaf node get descendant info.
249 for (int j = numLeaves; j < numNodes; j++) {
251 if (m->control_pressed) { return 1; }
254 //go through pcounts and pull out descendants
255 for (it = t[i]->tree[j].pcount.begin(); it != t[i]->tree[j].pcount.end(); it++) {
256 temp.push_back(it->first);
260 sort(temp.begin(), temp.end());
262 it2 = nodePairs.find(temp);
263 if (it2 != nodePairs.end()) {
272 //add each leaf to terminate recursion in consensus
273 //you want the leaves in there but with insignifigant sightings value so it is added last
274 //for each leaf node get descendant info.
275 for (int j = 0; j < numLeaves; j++) {
277 if (m->control_pressed) { return 1; }
279 //only need the first one since leaves have no descendants but themselves
280 it = t[0]->tree[j].pcount.begin();
281 temp.clear(); temp.push_back(it->first);
284 treeSet.push_back(it->first);
286 //add leaf to list but with sighting value less then all non leaf pairs
290 sort(treeSet.begin(), treeSet.end());
293 map< vector<string>, int> nodePairsCopy = nodePairs;
296 //set initial rating on pairs to sightings + subgroup sightings
297 while (nodePairsCopy.size() != 0) {
298 if (m->control_pressed) { return 1; }
300 vector<string> smallOne = getSmallest(nodePairsCopy);
302 int subgrouprate = getSubgroupRating(smallOne);
304 nodePairsInitialRate[smallOne] = nodePairs[smallOne] + subgrouprate;
306 nodePairsCopy.erase(smallOne);
311 catch(exception& e) {
312 m->errorOut(e, "ConcensusCommand", "getSets");
316 //**********************************************************************************************************************
317 vector<string> ConcensusCommand::getSmallest(map< vector<string>, int> nodes) {
319 vector<string> smallest = nodes.begin()->first;
320 int smallsize = smallest.size();
322 for(it2 = nodes.begin(); it2 != nodes.end(); it2++) {
323 if(it2->first.size() < smallsize) { smallsize = it2->first.size(); smallest = it2->first; }
328 catch(exception& e) {
329 m->errorOut(e, "ConcensusCommand", "getSmallest");
334 //**********************************************************************************************************************
335 vector<string> ConcensusCommand::getNextAvailableSet(vector<string> bigset, vector<string>& rest) {
337 //cout << "new call " << endl << endl << endl;
338 vector<string> largest; largest.clear();
341 //if you are just 2 groups
342 if (bigset.size() == 2) {
343 rest.push_back(bigset[0]);
344 largest.push_back(bigset[1]);
346 rest = bestSplit[bigset][0];
347 largest = bestSplit[bigset][1];
351 //save for printing out later and for branch lengths
352 nodePairsInTree[rest] = nodePairs[rest];
354 //delete whatever set you return because it is no longer available
355 nodePairs.erase(rest);
357 //save for printing out later and for branch lengths
358 nodePairsInTree[largest] = nodePairs[largest];
360 //delete whatever set you return because it is no longer available
361 nodePairs.erase(largest);
366 catch(exception& e) {
367 m->errorOut(e, "ConcensusCommand", "getNextAvailableSet");
372 /**********************************************************************************************************************/
373 int ConcensusCommand::getSubgroupRating(vector<string> group) {
375 map< vector<string>, int>::iterator ittemp;
376 map< vector< vector<string> > , int >::iterator it3;
379 // ***********************************************************************************//
380 //1. this function must be called passing it littlest sets to biggest
381 // since it the rating is made from your sighting plus you best splits rating
382 //2. it saves the top pair to use later
383 // ***********************************************************************************//
386 if (group.size() < 3) { return rate; }
388 map< vector<string>, int> possiblePairing; //this is all the subsets of group
390 //go through the sets
391 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
392 //are you a subset of bigset, then save in possiblePairings
393 if (isSubset(group, it2->first) == true) { possiblePairing[it2->first] = it2->second; }
396 map< vector< vector<string> > , int > rating;
398 while (possiblePairing.size() != 0) {
400 it2 = possiblePairing.begin();
401 vector<string> temprest = getRestSet(group, it2->first);
403 //is the rest a set available in possiblePairings
404 ittemp = possiblePairing.find(temprest);
405 if (ittemp != possiblePairing.end()) { //if the rest is in the possible pairings then add this pair to rating map
406 vector< vector<string> > temprate;
407 temprate.push_back(it2->first); temprate.push_back(temprest);
409 rating[temprate] = (nodePairsInitialRate[it2->first] + nodePairsInitialRate[temprest]);
411 //erase so you dont add 1,2 and 2,1.
412 possiblePairing.erase(temprest);
415 possiblePairing.erase(it2);
419 it3 = rating.begin();
421 vector< vector<string> > topPair = it3->first;
423 //choose the split with the best rating
424 for (it3 = rating.begin(); it3 != rating.end(); it3++) {
426 if (it3->second > rate) { rate = it3->second; topPair = it3->first; }
429 bestSplit[group] = topPair;
433 catch(exception& e) {
434 m->errorOut(e, "ConcensusCommand", "getSubgroupRating");
439 //**********************************************************************************************************************
440 vector<string> ConcensusCommand::getRestSet(vector<string> bigset, vector<string> subset) {
444 for (int i = 0; i < bigset.size(); i++) {
445 bool inSubset = false;
446 for (int j = 0; j < subset.size(); j++) {
447 if (bigset[i] == subset[j]) { inSubset = true; break; }
450 //its not in the subset so put it in the rest
451 if (inSubset == false) { rest.push_back(bigset[i]); }
457 catch(exception& e) {
458 m->errorOut(e, "ConcensusCommand", "getRestSet");
463 //**********************************************************************************************************************
464 bool ConcensusCommand::isSubset(vector<string> bigset, vector<string> subset) {
468 if (subset.size() > bigset.size()) { return false; }
470 //check if each guy in suset is also in bigset
471 for (int i = 0; i < subset.size(); i++) {
473 for (int j = 0; j < bigset.size(); j++) {
474 if (subset[i] == bigset[j]) { match = true; break; }
477 //you have a guy in subset that had no match in bigset
478 if (match == false) { return false; }
484 catch(exception& e) {
485 m->errorOut(e, "ConcensusCommand", "isSubset");
489 //**********************************************************************************************************************
490 int ConcensusCommand::findSpot(string node) {
494 //check if each guy in suset is also in bigset
495 for (int i = 0; i < treeSet.size(); i++) {
496 if (treeSet[i] == node) { spot = i; break; }
502 catch(exception& e) {
503 m->errorOut(e, "ConcensusCommand", "findSpot");
507 //**********************************************************************************************************************