5 * Created by Sarah Westcott on 4/29/09.
6 * Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
10 #include "consensuscommand.h"
12 //**********************************************************************************************************************
14 ConcensusCommand::ConcensusCommand(string fileroot) {
16 globaldata = GlobalData::getInstance();
21 t = globaldata->gTree;
25 m->errorOut(e, "ConcensusCommand", "ConcensusCommand");
30 //**********************************************************************************************************************
32 void ConcensusCommand::help(){
34 m->mothurOut("The consensus command can only be executed after a successful read.tree command.\n");
35 m->mothurOut("The consensus command has no parameters.\n");
36 m->mothurOut("The consensus command should be in the following format: consensus().\n");
37 m->mothurOut("The consensus command output two files: .consensus.tre and .consensuspairs.\n");
38 m->mothurOut("The .consensus.tre file contains the consensus tree of the trees in your input file.\n");
39 m->mothurOut("The branch lengths are the percentage of trees in your input file that had the given pair.\n");
40 m->mothurOut("The .consensuspairs file contains a list of the internal nodes in your tree. For each node, the pair that was used in the consensus tree \n");
41 m->mothurOut("is reported with its percentage, as well as the other pairs that were seen for that node but not used and their percentages.\n\n");
44 m->errorOut(e, "ConcensusCommand", "help");
49 //**********************************************************************************************************************
51 ConcensusCommand::~ConcensusCommand(){}
53 //**********************************************************************************************************************
55 int ConcensusCommand::execute(){
58 if (abort == true) { return 0; }
60 numNodes = t[0]->getNumNodes();
61 numLeaves = t[0]->getNumLeaves();
64 //get the possible pairings
67 if (m->control_pressed) { return 0; }
69 //open file for pairing not included in the tree
70 notIncluded = filename + ".cons.pairs";
71 openOutputFile(notIncluded, out2);
73 consensusTree = new Tree();
75 it2 = nodePairs.find(treeSet);
77 nodePairsInTree[treeSet] = it2->second;
79 //erase treeset because you are adding it
80 nodePairs.erase(treeSet);
82 //set count to numLeaves;
85 buildConcensusTree(treeSet);
87 if (m->control_pressed) { delete consensusTree; return 0; }
89 consensusTree->assembleTree();
91 if (m->control_pressed) { delete consensusTree; return 0; }
93 //output species in order
94 out2 << "Species in Order: " << endl << endl;
95 for (int i = 0; i < treeSet.size(); i++) { out2 << i+1 << ". " << treeSet[i] << endl; }
97 //output sets included
98 out2 << endl << "Sets included in the consensus tree:" << endl << endl;
100 if (m->control_pressed) { delete consensusTree; return 0; }
103 for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) {
105 if (m->control_pressed) { delete consensusTree; return 0; }
107 //only output pairs not leaves
108 if (it2->first.size() > 1) {
110 //initialize temp to all "."
111 temp.resize(treeSet.size(), ".");
113 //set the spot in temp that represents it2->first[i] to a "*"
114 for (int i = 0; i < it2->first.size(); i++) {
116 int index = findSpot(it2->first[i]);
118 //temp[index] = it2->first[i] + " ";
122 for (int j = 0; j < temp.size(); j++) {
125 out2 << '\t' << it2->second << endl;
129 //output sets not included
130 out2 << endl << "Sets NOT included in the consensus tree:" << endl << endl;
131 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
133 if (m->control_pressed) { delete consensusTree; return 0; }
136 //initialize temp to all "."
137 temp.resize(treeSet.size(), ".");
139 //set the spot in temp that represents it2->first[i] to a "*"
140 for (int i = 0; i < it2->first.size(); i++) {
142 int index = findSpot(it2->first[i]);
147 for (int j = 0; j < temp.size(); j++) {
150 out2 << '\t' << it2->second << endl;
153 outputFile = filename + ".cons.tre";
154 openOutputFile(outputFile, out);
156 consensusTree->printForBoot(out);
158 out.close(); out2.close();
160 delete consensusTree;
164 catch(exception& e) {
165 m->errorOut(e, "ConcensusCommand", "execute");
170 //**********************************************************************************************************************
171 int ConcensusCommand::buildConcensusTree(vector<string> nodeSet) {
173 vector<string> leftChildSet;
174 vector<string> rightChildSet;
176 if (m->control_pressed) { return 1; }
178 //if you are at a leaf
179 if (nodeSet.size() == 1) {
180 //return the vector index of the leaf you are at
181 return consensusTree->getIndex(nodeSet[0]);
182 //terminate recursion
183 }else if (count == numNodes) { return 0; }
185 //finds best child pair
186 leftChildSet = getNextAvailableSet(nodeSet, rightChildSet);
187 int left = buildConcensusTree(leftChildSet);
188 int right = buildConcensusTree(rightChildSet);
189 consensusTree->tree[count].setChildren(left, right);
190 consensusTree->tree[count].setLabel(nodePairsInTree[nodeSet]);
191 consensusTree->tree[left].setParent(count);
192 consensusTree->tree[right].setParent(count);
198 catch(exception& e) {
199 m->errorOut(e, "ConcensusCommand", "buildConcensusTree");
204 //**********************************************************************************************************************
205 int ConcensusCommand::getSets() {
210 //for each tree add the possible pairs you find
211 for (int i = 0; i < t.size(); i++) {
213 //for each non-leaf node get descendant info.
214 for (int j = numLeaves; j < numNodes; j++) {
216 if (m->control_pressed) { return 1; }
219 //go through pcounts and pull out descendants
220 for (it = t[i]->tree[j].pcount.begin(); it != t[i]->tree[j].pcount.end(); it++) {
221 temp.push_back(it->first);
225 sort(temp.begin(), temp.end());
227 it2 = nodePairs.find(temp);
228 if (it2 != nodePairs.end()) {
237 //add each leaf to terminate recursion in consensus
238 //you want the leaves in there but with insignifigant sightings value so it is added last
239 //for each leaf node get descendant info.
240 for (int j = 0; j < numLeaves; j++) {
242 if (m->control_pressed) { return 1; }
244 //only need the first one since leaves have no descendants but themselves
245 it = t[0]->tree[j].pcount.begin();
246 temp.clear(); temp.push_back(it->first);
249 treeSet.push_back(it->first);
251 //add leaf to list but with sighting value less then all non leaf pairs
255 sort(treeSet.begin(), treeSet.end());
258 map< vector<string>, int> nodePairsCopy = nodePairs;
261 //set initial rating on pairs to sightings + subgroup sightings
262 while (nodePairsCopy.size() != 0) {
263 if (m->control_pressed) { return 1; }
265 vector<string> small = getSmallest(nodePairsCopy);
267 int subgrouprate = getSubgroupRating(small);
269 nodePairsInitialRate[small] = nodePairs[small] + subgrouprate;
271 nodePairsCopy.erase(small);
276 catch(exception& e) {
277 m->errorOut(e, "ConcensusCommand", "getSets");
281 //**********************************************************************************************************************
282 vector<string> ConcensusCommand::getSmallest(map< vector<string>, int> nodes) {
284 vector<string> smallest = nodes.begin()->first;
285 int smallsize = smallest.size();
287 for(it2 = nodes.begin(); it2 != nodes.end(); it2++) {
288 if(it2->first.size() < smallsize) { smallsize = it2->first.size(); smallest = it2->first; }
293 catch(exception& e) {
294 m->errorOut(e, "ConcensusCommand", "getSmallest");
299 //**********************************************************************************************************************
300 vector<string> ConcensusCommand::getNextAvailableSet(vector<string> bigset, vector<string>& rest) {
302 //cout << "new call " << endl << endl << endl;
303 vector<string> largest; largest.clear();
306 //if you are just 2 groups
307 if (bigset.size() == 2) {
308 rest.push_back(bigset[0]);
309 largest.push_back(bigset[1]);
311 rest = bestSplit[bigset][0];
312 largest = bestSplit[bigset][1];
316 //save for printing out later and for branch lengths
317 nodePairsInTree[rest] = nodePairs[rest];
319 //delete whatever set you return because it is no longer available
320 nodePairs.erase(rest);
322 //save for printing out later and for branch lengths
323 nodePairsInTree[largest] = nodePairs[largest];
325 //delete whatever set you return because it is no longer available
326 nodePairs.erase(largest);
331 catch(exception& e) {
332 m->errorOut(e, "ConcensusCommand", "getNextAvailableSet");
337 /**********************************************************************************************************************/
338 int ConcensusCommand::getSubgroupRating(vector<string> group) {
340 map< vector<string>, int>::iterator ittemp;
341 map< vector< vector<string> > , int >::iterator it3;
344 // ***********************************************************************************//
345 //1. this function must be called passing it littlest sets to biggest
346 // since it the rating is made from your sighting plus you best splits rating
347 //2. it saves the top pair to use later
348 // ***********************************************************************************//
351 if (group.size() < 3) { return rate; }
353 map< vector<string>, int> possiblePairing; //this is all the subsets of group
355 //go through the sets
356 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
357 //are you a subset of bigset, then save in possiblePairings
358 if (isSubset(group, it2->first) == true) { possiblePairing[it2->first] = it2->second; }
361 map< vector< vector<string> > , int > rating;
363 while (possiblePairing.size() != 0) {
365 it2 = possiblePairing.begin();
366 vector<string> temprest = getRestSet(group, it2->first);
368 //is the rest a set available in possiblePairings
369 ittemp = possiblePairing.find(temprest);
370 if (ittemp != possiblePairing.end()) { //if the rest is in the possible pairings then add this pair to rating map
371 vector< vector<string> > temprate;
372 temprate.push_back(it2->first); temprate.push_back(temprest);
374 rating[temprate] = (nodePairsInitialRate[it2->first] + nodePairsInitialRate[temprest]);
376 //erase so you dont add 1,2 and 2,1.
377 possiblePairing.erase(temprest);
380 possiblePairing.erase(it2);
384 it3 = rating.begin();
386 vector< vector<string> > topPair = it3->first;
388 //choose the split with the best rating
389 for (it3 = rating.begin(); it3 != rating.end(); it3++) {
391 if (it3->second > rate) { rate = it3->second; topPair = it3->first; }
394 bestSplit[group] = topPair;
398 catch(exception& e) {
399 m->errorOut(e, "ConcensusCommand", "getSubgroupRating");
404 //**********************************************************************************************************************
405 vector<string> ConcensusCommand::getRestSet(vector<string> bigset, vector<string> subset) {
409 for (int i = 0; i < bigset.size(); i++) {
410 bool inSubset = false;
411 for (int j = 0; j < subset.size(); j++) {
412 if (bigset[i] == subset[j]) { inSubset = true; break; }
415 //its not in the subset so put it in the rest
416 if (inSubset == false) { rest.push_back(bigset[i]); }
422 catch(exception& e) {
423 m->errorOut(e, "ConcensusCommand", "getRestSet");
428 //**********************************************************************************************************************
429 bool ConcensusCommand::isSubset(vector<string> bigset, vector<string> subset) {
433 if (subset.size() > bigset.size()) { return false; }
435 //check if each guy in suset is also in bigset
436 for (int i = 0; i < subset.size(); i++) {
438 for (int j = 0; j < bigset.size(); j++) {
439 if (subset[i] == bigset[j]) { match = true; break; }
442 //you have a guy in subset that had no match in bigset
443 if (match == false) { return false; }
449 catch(exception& e) {
450 m->errorOut(e, "ConcensusCommand", "isSubset");
454 //**********************************************************************************************************************
455 int ConcensusCommand::findSpot(string node) {
459 //check if each guy in suset is also in bigset
460 for (int i = 0; i < treeSet.size(); i++) {
461 if (treeSet[i] == node) { spot = i; break; }
467 catch(exception& e) {
468 m->errorOut(e, "ConcensusCommand", "findSpot");
472 //**********************************************************************************************************************