5 * Created by Sarah Westcott on 4/29/09.
6 * Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
10 #include "consensus.h"
12 //**********************************************************************************************************************
13 Tree* Consensus::getTree(vector<Tree*>& t){
15 numNodes = t[0]->getNumNodes();
16 numLeaves = t[0]->getNumLeaves();
19 //get the possible pairings
22 if (m->control_pressed) { return 0; }
24 consensusTree = new Tree(t[0]->getTreeMap());
26 it2 = nodePairs.find(treeSet);
28 nodePairsInTree[treeSet] = it2->second;
30 //erase treeset because you are adding it
31 nodePairs.erase(treeSet);
33 //set count to numLeaves;
36 buildConsensusTree(treeSet);
38 if (m->control_pressed) { delete consensusTree; return 0; }
40 map<string, string> empty;
41 consensusTree->assembleTree(empty);
43 if (m->control_pressed) { delete consensusTree; return 0; }
50 m->errorOut(e, "Consensus", "execute");
54 //**********************************************************************************************************************
55 int Consensus::printSetsInfo() {
57 //open file for pairing not included in the tree
58 string notIncluded = "cons.pairs";
60 m->openOutputFile(notIncluded, out2);
62 //output species in order
63 out2 << "Species in Order: " << endl << endl;
64 for (int i = 0; i < treeSet.size(); i++) { out2 << i+1 << ". " << treeSet[i] << endl; }
66 //output sets included
67 out2 << endl << "Sets included in the consensus tree:" << endl << endl;
69 if (m->control_pressed) { return 0; }
72 for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) {
74 if (m->control_pressed) { return 0; }
76 //only output pairs not leaves
77 if (it2->first.size() > 1) {
79 //initialize temp to all "."
80 temp.resize(treeSet.size(), ".");
82 //set the spot in temp that represents it2->first[i] to a "*"
83 for (int i = 0; i < it2->first.size(); i++) {
85 int index = findSpot(it2->first[i]);
87 //temp[index] = it2->first[i] + " ";
91 for (int j = 0; j < temp.size(); j++) {
94 out2 << '\t' << it2->second << endl;
98 //output sets not included
99 out2 << endl << "Sets NOT included in the consensus tree:" << endl << endl;
100 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
102 if (m->control_pressed) { return 0; }
105 //initialize temp to all "."
106 temp.resize(treeSet.size(), ".");
108 //set the spot in temp that represents it2->first[i] to a "*"
109 for (int i = 0; i < it2->first.size(); i++) {
111 int index = findSpot(it2->first[i]);
116 for (int j = 0; j < temp.size(); j++) {
119 out2 << '\t' << it2->second << endl;
124 catch(exception& e) {
125 m->errorOut(e, "Consensus", "printSetsInfo");
129 //**********************************************************************************************************************
130 int Consensus::buildConsensusTree(vector<string> nodeSet) {
132 vector<string> leftChildSet;
133 vector<string> rightChildSet;
135 if (m->control_pressed) { return 1; }
137 //if you are at a leaf
138 if (nodeSet.size() == 1) {
139 //return the vector index of the leaf you are at
140 return consensusTree->getIndex(nodeSet[0]);
141 //terminate recursion
142 }else if (count == numNodes) { return 0; }
144 //finds best child pair
145 leftChildSet = getNextAvailableSet(nodeSet, rightChildSet);
146 int left = buildConsensusTree(leftChildSet);
147 int right = buildConsensusTree(rightChildSet);
148 consensusTree->tree[count].setChildren(left, right);
149 consensusTree->tree[count].setLabel((nodePairsInTree[nodeSet]/(float)numTrees));
150 consensusTree->tree[left].setParent(count);
151 consensusTree->tree[right].setParent(count);
157 catch(exception& e) {
158 m->errorOut(e, "Consensus", "buildConcensusTree");
163 //**********************************************************************************************************************
164 int Consensus::getSets(vector<Tree*>& t) {
169 //for each tree add the possible pairs you find
170 for (int i = 0; i < t.size(); i++) {
172 //for each non-leaf node get descendant info.
173 for (int j = numLeaves; j < numNodes; j++) {
175 if (m->control_pressed) { return 1; }
178 //go through pcounts and pull out descendants
179 for (it = t[i]->tree[j].pcount.begin(); it != t[i]->tree[j].pcount.end(); it++) {
180 temp.push_back(it->first);
184 sort(temp.begin(), temp.end());
186 it2 = nodePairs.find(temp);
187 if (it2 != nodePairs.end()) {
196 //add each leaf to terminate recursion in consensus
197 //you want the leaves in there but with insignifigant sightings value so it is added last
198 //for each leaf node get descendant info.
199 for (int j = 0; j < numLeaves; j++) {
201 if (m->control_pressed) { return 1; }
203 //only need the first one since leaves have no descendants but themselves
204 it = t[0]->tree[j].pcount.begin();
205 temp.clear(); temp.push_back(it->first);
208 treeSet.push_back(it->first);
210 //add leaf to list but with sighting value less then all non leaf pairs
214 sort(treeSet.begin(), treeSet.end());
217 map< vector<string>, int> nodePairsCopy = nodePairs;
220 //set initial rating on pairs to sightings + subgroup sightings
221 while (nodePairsCopy.size() != 0) {
222 if (m->control_pressed) { return 1; }
224 vector<string> smallOne = getSmallest(nodePairsCopy);
226 int subgrouprate = getSubgroupRating(smallOne);
228 nodePairsInitialRate[smallOne] = nodePairs[smallOne] + subgrouprate;
230 nodePairsCopy.erase(smallOne);
235 catch(exception& e) {
236 m->errorOut(e, "Consensus", "getSets");
240 //**********************************************************************************************************************
241 vector<string> Consensus::getSmallest(map< vector<string>, int> nodes) {
243 vector<string> smallest = nodes.begin()->first;
244 int smallsize = smallest.size();
246 for(it2 = nodes.begin(); it2 != nodes.end(); it2++) {
247 if(it2->first.size() < smallsize) { smallsize = it2->first.size(); smallest = it2->first; }
252 catch(exception& e) {
253 m->errorOut(e, "Consensus", "getSmallest");
258 //**********************************************************************************************************************
259 vector<string> Consensus::getNextAvailableSet(vector<string> bigset, vector<string>& rest) {
261 //cout << "new call " << endl << endl << endl;
262 vector<string> largest; largest.clear();
265 //if you are just 2 groups
266 if (bigset.size() == 2) {
267 rest.push_back(bigset[0]);
268 largest.push_back(bigset[1]);
270 rest = bestSplit[bigset][0];
271 largest = bestSplit[bigset][1];
275 //save for printing out later and for branch lengths
276 nodePairsInTree[rest] = nodePairs[rest];
278 //delete whatever set you return because it is no longer available
279 nodePairs.erase(rest);
281 //save for printing out later and for branch lengths
282 nodePairsInTree[largest] = nodePairs[largest];
284 //delete whatever set you return because it is no longer available
285 nodePairs.erase(largest);
290 catch(exception& e) {
291 m->errorOut(e, "Consensus", "getNextAvailableSet");
296 /**********************************************************************************************************************/
297 int Consensus::getSubgroupRating(vector<string> group) {
299 map< vector<string>, int>::iterator ittemp;
300 map< vector< vector<string> > , int >::iterator it3;
303 // ***********************************************************************************//
304 //1. this function must be called passing it littlest sets to biggest
305 // since it the rating is made from your sighting plus you best splits rating
306 //2. it saves the top pair to use later
307 // ***********************************************************************************//
310 if (group.size() < 3) { return rate; }
312 map< vector<string>, int> possiblePairing; //this is all the subsets of group
314 //go through the sets
315 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
316 //are you a subset of bigset, then save in possiblePairings
317 if (isSubset(group, it2->first) == true) { possiblePairing[it2->first] = it2->second; }
320 map< vector< vector<string> > , int > rating;
322 while (possiblePairing.size() != 0) {
324 it2 = possiblePairing.begin();
325 vector<string> temprest = getRestSet(group, it2->first);
327 //is the rest a set available in possiblePairings
328 ittemp = possiblePairing.find(temprest);
329 if (ittemp != possiblePairing.end()) { //if the rest is in the possible pairings then add this pair to rating map
330 vector< vector<string> > temprate;
331 temprate.push_back(it2->first); temprate.push_back(temprest);
333 rating[temprate] = (nodePairsInitialRate[it2->first] + nodePairsInitialRate[temprest]);
335 //erase so you dont add 1,2 and 2,1.
336 possiblePairing.erase(temprest);
339 possiblePairing.erase(it2);
343 it3 = rating.begin();
345 vector< vector<string> > topPair = it3->first;
347 //choose the split with the best rating
348 for (it3 = rating.begin(); it3 != rating.end(); it3++) {
350 if (it3->second > rate) { rate = it3->second; topPair = it3->first; }
353 bestSplit[group] = topPair;
357 catch(exception& e) {
358 m->errorOut(e, "Consensus", "getSubgroupRating");
363 //**********************************************************************************************************************
364 vector<string> Consensus::getRestSet(vector<string> bigset, vector<string> subset) {
368 for (int i = 0; i < bigset.size(); i++) {
369 bool inSubset = false;
370 for (int j = 0; j < subset.size(); j++) {
371 if (bigset[i] == subset[j]) { inSubset = true; break; }
374 //its not in the subset so put it in the rest
375 if (inSubset == false) { rest.push_back(bigset[i]); }
381 catch(exception& e) {
382 m->errorOut(e, "Consensus", "getRestSet");
387 //**********************************************************************************************************************
388 bool Consensus::isSubset(vector<string> bigset, vector<string> subset) {
392 if (subset.size() > bigset.size()) { return false; }
394 //check if each guy in suset is also in bigset
395 for (int i = 0; i < subset.size(); i++) {
397 for (int j = 0; j < bigset.size(); j++) {
398 if (subset[i] == bigset[j]) { match = true; break; }
401 //you have a guy in subset that had no match in bigset
402 if (match == false) { return false; }
408 catch(exception& e) {
409 m->errorOut(e, "Consensus", "isSubset");
413 //**********************************************************************************************************************
414 int Consensus::findSpot(string node) {
418 //check if each guy in suset is also in bigset
419 for (int i = 0; i < treeSet.size(); i++) {
420 if (treeSet[i] == node) { spot = i; break; }
426 catch(exception& e) {
427 m->errorOut(e, "Consensus", "findSpot");
431 //**********************************************************************************************************************