5 * Created by Sarah Westcott on 4/29/09.
6 * Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
10 #include "consensuscommand.h"
12 //**********************************************************************************************************************
13 vector<string> ConcensusCommand::getValidParameters(){
15 vector<string> myArray;
19 m->errorOut(e, "ConcensusCommand", "getValidParameters");
23 //**********************************************************************************************************************
24 vector<string> ConcensusCommand::getRequiredParameters(){
26 vector<string> myArray;
30 m->errorOut(e, "ConcensusCommand", "getRequiredParameters");
34 //**********************************************************************************************************************
35 vector<string> ConcensusCommand::getRequiredFiles(){
37 string AlignArray[] = {"tree","group"};
38 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
42 m->errorOut(e, "ConcensusCommand", "getRequiredFiles");
46 //**********************************************************************************************************************
47 ConcensusCommand::ConcensusCommand(){
49 abort = true; calledHelp = true;
50 vector<string> tempOutNames;
51 outputTypes["tree"] = tempOutNames;
52 outputTypes["nodepairs"] = tempOutNames;
55 m->errorOut(e, "ConcensusCommand", "ConcensusCommand");
59 //**********************************************************************************************************************
61 ConcensusCommand::ConcensusCommand(string fileroot) {
63 globaldata = GlobalData::getInstance();
64 abort = false; calledHelp = false;
66 //initialize outputTypes
67 vector<string> tempOutNames;
68 outputTypes["tree"] = tempOutNames;
69 outputTypes["nodepairs"] = tempOutNames;
73 t = globaldata->gTree;
77 m->errorOut(e, "ConcensusCommand", "ConcensusCommand");
82 //**********************************************************************************************************************
84 void ConcensusCommand::help(){
86 m->mothurOut("The consensus command can only be executed after a successful read.tree command.\n");
87 m->mothurOut("The consensus command has no parameters.\n");
88 m->mothurOut("The consensus command should be in the following format: consensus().\n");
89 m->mothurOut("The consensus command output two files: .consensus.tre and .consensuspairs.\n");
90 m->mothurOut("The .consensus.tre file contains the consensus tree of the trees in your input file.\n");
91 m->mothurOut("The branch lengths are the percentage of trees in your input file that had the given pair.\n");
92 m->mothurOut("The .consensuspairs file contains a list of the internal nodes in your tree. For each node, the pair that was used in the consensus tree \n");
93 m->mothurOut("is reported with its percentage, as well as the other pairs that were seen for that node but not used and their percentages.\n\n");
96 m->errorOut(e, "ConcensusCommand", "help");
101 //**********************************************************************************************************************
103 ConcensusCommand::~ConcensusCommand(){}
105 //**********************************************************************************************************************
107 int ConcensusCommand::execute(){
110 if (abort == true) { if (calledHelp) { return 0; } return 2; }
112 numNodes = t[0]->getNumNodes();
113 numLeaves = t[0]->getNumLeaves();
116 //get the possible pairings
119 if (m->control_pressed) { return 0; }
121 //open file for pairing not included in the tree
122 notIncluded = filename + ".cons.pairs"; outputNames.push_back(notIncluded); outputTypes["nodepairs"].push_back(notIncluded);
123 m->openOutputFile(notIncluded, out2);
125 consensusTree = new Tree();
127 it2 = nodePairs.find(treeSet);
129 nodePairsInTree[treeSet] = it2->second;
131 //erase treeset because you are adding it
132 nodePairs.erase(treeSet);
134 //set count to numLeaves;
137 buildConcensusTree(treeSet);
139 if (m->control_pressed) { delete consensusTree; return 0; }
141 consensusTree->assembleTree();
143 if (m->control_pressed) { delete consensusTree; return 0; }
145 //output species in order
146 out2 << "Species in Order: " << endl << endl;
147 for (int i = 0; i < treeSet.size(); i++) { out2 << i+1 << ". " << treeSet[i] << endl; }
149 //output sets included
150 out2 << endl << "Sets included in the consensus tree:" << endl << endl;
152 if (m->control_pressed) { delete consensusTree; return 0; }
155 for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) {
157 if (m->control_pressed) { delete consensusTree; return 0; }
159 //only output pairs not leaves
160 if (it2->first.size() > 1) {
162 //initialize temp to all "."
163 temp.resize(treeSet.size(), ".");
165 //set the spot in temp that represents it2->first[i] to a "*"
166 for (int i = 0; i < it2->first.size(); i++) {
168 int index = findSpot(it2->first[i]);
170 //temp[index] = it2->first[i] + " ";
174 for (int j = 0; j < temp.size(); j++) {
177 out2 << '\t' << it2->second << endl;
181 //output sets not included
182 out2 << endl << "Sets NOT included in the consensus tree:" << endl << endl;
183 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
185 if (m->control_pressed) { delete consensusTree; return 0; }
188 //initialize temp to all "."
189 temp.resize(treeSet.size(), ".");
191 //set the spot in temp that represents it2->first[i] to a "*"
192 for (int i = 0; i < it2->first.size(); i++) {
194 int index = findSpot(it2->first[i]);
199 for (int j = 0; j < temp.size(); j++) {
202 out2 << '\t' << it2->second << endl;
205 outputFile = filename + ".cons.tre"; outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
206 m->openOutputFile(outputFile, out);
208 consensusTree->print(out, "boot");
210 out.close(); out2.close();
212 delete consensusTree;
214 //set first tree file as new current treefile
215 string currentTree = "";
216 itTypes = outputTypes.find("tree");
217 if (itTypes != outputTypes.end()) {
218 if ((itTypes->second).size() != 0) { currentTree = (itTypes->second)[0]; m->setTreeFile(currentTree); }
223 catch(exception& e) {
224 m->errorOut(e, "ConcensusCommand", "execute");
229 //**********************************************************************************************************************
230 int ConcensusCommand::buildConcensusTree(vector<string> nodeSet) {
232 vector<string> leftChildSet;
233 vector<string> rightChildSet;
235 if (m->control_pressed) { return 1; }
237 //if you are at a leaf
238 if (nodeSet.size() == 1) {
239 //return the vector index of the leaf you are at
240 return consensusTree->getIndex(nodeSet[0]);
241 //terminate recursion
242 }else if (count == numNodes) { return 0; }
244 //finds best child pair
245 leftChildSet = getNextAvailableSet(nodeSet, rightChildSet);
246 int left = buildConcensusTree(leftChildSet);
247 int right = buildConcensusTree(rightChildSet);
248 consensusTree->tree[count].setChildren(left, right);
249 consensusTree->tree[count].setLabel(nodePairsInTree[nodeSet]);
250 consensusTree->tree[left].setParent(count);
251 consensusTree->tree[right].setParent(count);
257 catch(exception& e) {
258 m->errorOut(e, "ConcensusCommand", "buildConcensusTree");
263 //**********************************************************************************************************************
264 int ConcensusCommand::getSets() {
269 //for each tree add the possible pairs you find
270 for (int i = 0; i < t.size(); i++) {
272 //for each non-leaf node get descendant info.
273 for (int j = numLeaves; j < numNodes; j++) {
275 if (m->control_pressed) { return 1; }
278 //go through pcounts and pull out descendants
279 for (it = t[i]->tree[j].pcount.begin(); it != t[i]->tree[j].pcount.end(); it++) {
280 temp.push_back(it->first);
284 sort(temp.begin(), temp.end());
286 it2 = nodePairs.find(temp);
287 if (it2 != nodePairs.end()) {
296 //add each leaf to terminate recursion in consensus
297 //you want the leaves in there but with insignifigant sightings value so it is added last
298 //for each leaf node get descendant info.
299 for (int j = 0; j < numLeaves; j++) {
301 if (m->control_pressed) { return 1; }
303 //only need the first one since leaves have no descendants but themselves
304 it = t[0]->tree[j].pcount.begin();
305 temp.clear(); temp.push_back(it->first);
308 treeSet.push_back(it->first);
310 //add leaf to list but with sighting value less then all non leaf pairs
314 sort(treeSet.begin(), treeSet.end());
317 map< vector<string>, int> nodePairsCopy = nodePairs;
320 //set initial rating on pairs to sightings + subgroup sightings
321 while (nodePairsCopy.size() != 0) {
322 if (m->control_pressed) { return 1; }
324 vector<string> smallOne = getSmallest(nodePairsCopy);
326 int subgrouprate = getSubgroupRating(smallOne);
328 nodePairsInitialRate[smallOne] = nodePairs[smallOne] + subgrouprate;
330 nodePairsCopy.erase(smallOne);
335 catch(exception& e) {
336 m->errorOut(e, "ConcensusCommand", "getSets");
340 //**********************************************************************************************************************
341 vector<string> ConcensusCommand::getSmallest(map< vector<string>, int> nodes) {
343 vector<string> smallest = nodes.begin()->first;
344 int smallsize = smallest.size();
346 for(it2 = nodes.begin(); it2 != nodes.end(); it2++) {
347 if(it2->first.size() < smallsize) { smallsize = it2->first.size(); smallest = it2->first; }
352 catch(exception& e) {
353 m->errorOut(e, "ConcensusCommand", "getSmallest");
358 //**********************************************************************************************************************
359 vector<string> ConcensusCommand::getNextAvailableSet(vector<string> bigset, vector<string>& rest) {
361 //cout << "new call " << endl << endl << endl;
362 vector<string> largest; largest.clear();
365 //if you are just 2 groups
366 if (bigset.size() == 2) {
367 rest.push_back(bigset[0]);
368 largest.push_back(bigset[1]);
370 rest = bestSplit[bigset][0];
371 largest = bestSplit[bigset][1];
375 //save for printing out later and for branch lengths
376 nodePairsInTree[rest] = nodePairs[rest];
378 //delete whatever set you return because it is no longer available
379 nodePairs.erase(rest);
381 //save for printing out later and for branch lengths
382 nodePairsInTree[largest] = nodePairs[largest];
384 //delete whatever set you return because it is no longer available
385 nodePairs.erase(largest);
390 catch(exception& e) {
391 m->errorOut(e, "ConcensusCommand", "getNextAvailableSet");
396 /**********************************************************************************************************************/
397 int ConcensusCommand::getSubgroupRating(vector<string> group) {
399 map< vector<string>, int>::iterator ittemp;
400 map< vector< vector<string> > , int >::iterator it3;
403 // ***********************************************************************************//
404 //1. this function must be called passing it littlest sets to biggest
405 // since it the rating is made from your sighting plus you best splits rating
406 //2. it saves the top pair to use later
407 // ***********************************************************************************//
410 if (group.size() < 3) { return rate; }
412 map< vector<string>, int> possiblePairing; //this is all the subsets of group
414 //go through the sets
415 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
416 //are you a subset of bigset, then save in possiblePairings
417 if (isSubset(group, it2->first) == true) { possiblePairing[it2->first] = it2->second; }
420 map< vector< vector<string> > , int > rating;
422 while (possiblePairing.size() != 0) {
424 it2 = possiblePairing.begin();
425 vector<string> temprest = getRestSet(group, it2->first);
427 //is the rest a set available in possiblePairings
428 ittemp = possiblePairing.find(temprest);
429 if (ittemp != possiblePairing.end()) { //if the rest is in the possible pairings then add this pair to rating map
430 vector< vector<string> > temprate;
431 temprate.push_back(it2->first); temprate.push_back(temprest);
433 rating[temprate] = (nodePairsInitialRate[it2->first] + nodePairsInitialRate[temprest]);
435 //erase so you dont add 1,2 and 2,1.
436 possiblePairing.erase(temprest);
439 possiblePairing.erase(it2);
443 it3 = rating.begin();
445 vector< vector<string> > topPair = it3->first;
447 //choose the split with the best rating
448 for (it3 = rating.begin(); it3 != rating.end(); it3++) {
450 if (it3->second > rate) { rate = it3->second; topPair = it3->first; }
453 bestSplit[group] = topPair;
457 catch(exception& e) {
458 m->errorOut(e, "ConcensusCommand", "getSubgroupRating");
463 //**********************************************************************************************************************
464 vector<string> ConcensusCommand::getRestSet(vector<string> bigset, vector<string> subset) {
468 for (int i = 0; i < bigset.size(); i++) {
469 bool inSubset = false;
470 for (int j = 0; j < subset.size(); j++) {
471 if (bigset[i] == subset[j]) { inSubset = true; break; }
474 //its not in the subset so put it in the rest
475 if (inSubset == false) { rest.push_back(bigset[i]); }
481 catch(exception& e) {
482 m->errorOut(e, "ConcensusCommand", "getRestSet");
487 //**********************************************************************************************************************
488 bool ConcensusCommand::isSubset(vector<string> bigset, vector<string> subset) {
492 if (subset.size() > bigset.size()) { return false; }
494 //check if each guy in suset is also in bigset
495 for (int i = 0; i < subset.size(); i++) {
497 for (int j = 0; j < bigset.size(); j++) {
498 if (subset[i] == bigset[j]) { match = true; break; }
501 //you have a guy in subset that had no match in bigset
502 if (match == false) { return false; }
508 catch(exception& e) {
509 m->errorOut(e, "ConcensusCommand", "isSubset");
513 //**********************************************************************************************************************
514 int ConcensusCommand::findSpot(string node) {
518 //check if each guy in suset is also in bigset
519 for (int i = 0; i < treeSet.size(); i++) {
520 if (treeSet[i] == node) { spot = i; break; }
526 catch(exception& e) {
527 m->errorOut(e, "ConcensusCommand", "findSpot");
531 //**********************************************************************************************************************