]> git.donarmstrong.com Git - mothur.git/blob - consensuscommand.cpp
moved utilities out of mothur.h and into mothurOut class.
[mothur.git] / consensuscommand.cpp
1 /*
2  *  consensuscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/29/09.
6  *  Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
7  *
8  */
9
10 #include "consensuscommand.h"
11
12 //**********************************************************************************************************************
13
14 ConcensusCommand::ConcensusCommand(string fileroot)  {
15         try {
16                 globaldata = GlobalData::getInstance();
17                 abort = false;
18                 
19                 filename = fileroot;
20                 
21                 t = globaldata->gTree;
22         
23         }
24         catch(exception& e) {
25                 m->errorOut(e, "ConcensusCommand", "ConcensusCommand");
26                 exit(1);
27         }
28 }
29
30 //**********************************************************************************************************************
31
32 void ConcensusCommand::help(){
33         try {
34                 m->mothurOut("The consensus command can only be executed after a successful read.tree command.\n");
35                 m->mothurOut("The consensus command has no parameters.\n");
36                 m->mothurOut("The consensus command should be in the following format: consensus().\n");
37                 m->mothurOut("The consensus command output two files: .consensus.tre and .consensuspairs.\n");
38                 m->mothurOut("The .consensus.tre file contains the consensus tree of the trees in your input file.\n");
39                 m->mothurOut("The branch lengths are the percentage of trees in your input file that had the given pair.\n");
40                 m->mothurOut("The .consensuspairs file contains a list of the internal nodes in your tree.  For each node, the pair that was used in the consensus tree \n");
41                 m->mothurOut("is reported with its percentage, as well as the other pairs that were seen for that node but not used and their percentages.\n\n");               
42         }
43         catch(exception& e) {
44                 m->errorOut(e, "ConcensusCommand", "help");
45                 exit(1);
46         }
47 }
48
49 //**********************************************************************************************************************
50
51 ConcensusCommand::~ConcensusCommand(){}
52
53 //**********************************************************************************************************************
54
55 int ConcensusCommand::execute(){
56         try {
57                 
58                 if (abort == true) { return 0; }
59                 else {  
60                         numNodes = t[0]->getNumNodes();
61                         numLeaves = t[0]->getNumLeaves();
62                 }
63                 
64                 //get the possible pairings
65                 getSets();      
66                 
67                 if (m->control_pressed) { return 0; }
68                 
69                 //open file for pairing not included in the tree
70                 notIncluded = filename + ".cons.pairs";
71                 m->openOutputFile(notIncluded, out2);
72                 
73                 consensusTree = new Tree();
74                 
75                 it2 = nodePairs.find(treeSet);
76                 
77                 nodePairsInTree[treeSet] = it2->second; 
78                 
79                 //erase treeset because you are adding it
80                 nodePairs.erase(treeSet);
81                 
82                 //set count to numLeaves;
83                 count = numLeaves;
84                 
85                 buildConcensusTree(treeSet);
86                 
87                 if (m->control_pressed) { delete consensusTree; return 0; }
88                 
89                 consensusTree->assembleTree();
90                 
91                 if (m->control_pressed) { delete consensusTree; return 0; }
92                 
93                 //output species in order
94                 out2 << "Species in Order: " << endl << endl;
95                 for (int i = 0; i < treeSet.size(); i++) {  out2 << i+1 << ".  " << treeSet[i] << endl; }
96                 
97                 //output sets included
98                 out2 << endl << "Sets included in the consensus tree:" << endl << endl;
99                 
100                 if (m->control_pressed) { delete consensusTree; return 0; }
101                 
102                 vector<string> temp;
103                 for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) {
104                 
105                         if (m->control_pressed) { delete consensusTree; return 0; }
106                         
107                         //only output pairs not leaves
108                         if (it2->first.size() > 1) { 
109                                 temp.clear();
110                                 //initialize temp to all "."
111                                 temp.resize(treeSet.size(), ".");
112                                 
113                                 //set the spot in temp that represents it2->first[i] to a "*" 
114                                 for (int i = 0; i < it2->first.size(); i++) {
115                                         //find spot 
116                                         int index = findSpot(it2->first[i]);
117                                         temp[index] = "*";
118                                         //temp[index] = it2->first[i] + "  ";
119                                 }
120                                 
121                                 //output temp
122                                 for (int j = 0; j < temp.size(); j++) { 
123                                         out2 << temp[j];
124                                 }
125                                 out2 << '\t' << it2->second << endl;
126                         }
127                 }
128                 
129                 //output sets not included
130                 out2 << endl << "Sets NOT included in the consensus tree:" << endl << endl;
131                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
132                 
133                         if (m->control_pressed) { delete consensusTree; return 0; }
134                         
135                         temp.clear();
136                         //initialize temp to all "."
137                         temp.resize(treeSet.size(), ".");
138                                 
139                         //set the spot in temp that represents it2->first[i] to a "*" 
140                         for (int i = 0; i < it2->first.size(); i++) {
141                                 //find spot 
142                                 int index = findSpot(it2->first[i]);
143                                 temp[index] = "*";
144                         }
145                                 
146                         //output temp
147                         for (int j = 0; j < temp.size(); j++) { 
148                                 out2 << temp[j];
149                         }
150                         out2 << '\t' << it2->second << endl;
151                 }
152                 
153                 outputFile = filename + ".cons.tre";
154                 m->openOutputFile(outputFile, out);
155                 
156                 consensusTree->printForBoot(out);
157                 
158                 out.close(); out2.close();
159                 
160                 delete consensusTree; 
161                 
162                 return 0;
163         }
164         catch(exception& e) {
165                 m->errorOut(e, "ConcensusCommand", "execute");
166                 exit(1);
167         }
168 }
169
170 //**********************************************************************************************************************
171 int ConcensusCommand::buildConcensusTree(vector<string> nodeSet) {
172         try {
173                 vector<string> leftChildSet;
174                 vector<string> rightChildSet;
175                 
176                 if (m->control_pressed) { return 1; }
177                 
178                 //if you are at a leaf
179                 if (nodeSet.size() == 1) {
180                         //return the vector index of the leaf you are at
181                         return consensusTree->getIndex(nodeSet[0]);
182                 //terminate recursion
183                 }else if (count == numNodes) { return 0; }
184                 else {
185                         //finds best child pair
186                         leftChildSet = getNextAvailableSet(nodeSet, rightChildSet);
187                         int left = buildConcensusTree(leftChildSet);
188                         int right = buildConcensusTree(rightChildSet);
189                         consensusTree->tree[count].setChildren(left, right);
190                         consensusTree->tree[count].setLabel(nodePairsInTree[nodeSet]); 
191                         consensusTree->tree[left].setParent(count);
192                         consensusTree->tree[right].setParent(count);
193                         count++;
194                         return (count-1);
195                 }
196         
197         }
198         catch(exception& e) {
199                 m->errorOut(e, "ConcensusCommand", "buildConcensusTree");
200                 exit(1);
201         }
202 }
203
204 //**********************************************************************************************************************
205 int ConcensusCommand::getSets() {
206         try {
207                 vector<string> temp;
208                 treeSet.clear();
209                 
210                 //for each tree add the possible pairs you find
211                 for (int i = 0; i < t.size(); i++) {
212                         
213                         //for each non-leaf node get descendant info.
214                         for (int j = numLeaves; j < numNodes; j++) {
215                                 
216                                 if (m->control_pressed) { return 1; }
217                                 
218                                 temp.clear();
219                                 //go through pcounts and pull out descendants
220                                 for (it = t[i]->tree[j].pcount.begin(); it != t[i]->tree[j].pcount.end(); it++) {
221                                         temp.push_back(it->first);
222                                 }
223                                 
224                                 //sort temp
225                                 sort(temp.begin(), temp.end());
226                                 
227                                 it2 = nodePairs.find(temp);
228                                 if (it2 != nodePairs.end()) {
229                                         nodePairs[temp]++;
230                                 }else{
231                                         nodePairs[temp] = 1;
232                                 }
233                         }
234                 }
235                 
236                 
237                 //add each leaf to terminate recursion in consensus
238                 //you want the leaves in there but with insignifigant sightings value so it is added last
239                 //for each leaf node get descendant info.
240                 for (int j = 0; j < numLeaves; j++) {
241                 
242                         if (m->control_pressed) { return 1; }
243                         
244                         //only need the first one since leaves have no descendants but themselves
245                         it = t[0]->tree[j].pcount.begin(); 
246                         temp.clear();  temp.push_back(it->first);
247                         
248                         //fill treeSet
249                         treeSet.push_back(it->first);
250                         
251                         //add leaf to list but with sighting value less then all non leaf pairs 
252                         nodePairs[temp] = 0;
253                 }
254
255                 sort(treeSet.begin(), treeSet.end());
256                 
257                 
258                 map< vector<string>, int> nodePairsCopy = nodePairs;
259                 
260                 
261                 //set initial rating on pairs to sightings + subgroup sightings
262                 while (nodePairsCopy.size() != 0) {
263                         if (m->control_pressed) { return 1; }
264                 
265                         vector<string> smallOne = getSmallest(nodePairsCopy);
266                         
267                         int subgrouprate = getSubgroupRating(smallOne);
268                 
269                         nodePairsInitialRate[smallOne] = nodePairs[smallOne] + subgrouprate;
270                         
271                         nodePairsCopy.erase(smallOne);
272                 }
273                 
274                 return 0;
275         }
276         catch(exception& e) {
277                 m->errorOut(e, "ConcensusCommand", "getSets");
278                 exit(1);
279         }
280 }
281 //**********************************************************************************************************************
282 vector<string> ConcensusCommand::getSmallest(map< vector<string>, int> nodes) {
283         try{
284                 vector<string> smallest = nodes.begin()->first;
285                 int smallsize = smallest.size();
286                 
287                 for(it2 = nodes.begin(); it2 != nodes.end(); it2++) {
288                         if(it2->first.size() < smallsize) { smallsize = it2->first.size();  smallest = it2->first;  }
289                 }
290                 
291                 return smallest;
292         }
293         catch(exception& e) {
294                 m->errorOut(e, "ConcensusCommand", "getSmallest");
295                 exit(1);
296         }
297 }
298
299 //**********************************************************************************************************************
300 vector<string> ConcensusCommand::getNextAvailableSet(vector<string> bigset, vector<string>& rest) {
301         try {
302 //cout << "new call " << endl << endl << endl;
303                 vector<string> largest; largest.clear();
304                 rest.clear();
305                 
306                 //if you are just 2 groups
307                 if (bigset.size() == 2)  {   
308                         rest.push_back(bigset[0]);
309                         largest.push_back(bigset[1]);
310                 }else{
311                         rest = bestSplit[bigset][0];
312                         largest = bestSplit[bigset][1];
313                 }
314                 
315                 
316                 //save for printing out later and for branch lengths
317                 nodePairsInTree[rest] = nodePairs[rest];
318                 
319                 //delete whatever set you return because it is no longer available
320                 nodePairs.erase(rest);
321
322                 //save for printing out later and for branch lengths
323                 nodePairsInTree[largest] = nodePairs[largest];
324                 
325                 //delete whatever set you return because it is no longer available
326                 nodePairs.erase(largest);
327                 
328                 return largest;
329         
330         }
331         catch(exception& e) {
332                 m->errorOut(e, "ConcensusCommand", "getNextAvailableSet");
333                 exit(1);
334         }
335 }
336
337 /**********************************************************************************************************************/
338 int ConcensusCommand::getSubgroupRating(vector<string> group) {
339         try {
340                 map< vector<string>, int>::iterator ittemp;
341                 map< vector< vector<string> > , int >::iterator it3;
342                 int rate = 0;
343                 
344                 // ***********************************************************************************//
345                 //1. this function must be called passing it littlest sets to biggest 
346                 //              since it the rating is made from your sighting plus you best splits rating
347                 //2. it saves the top pair to use later
348                 // ***********************************************************************************//
349
350                 
351                 if (group.size() < 3) {  return rate;  }
352                 
353                 map< vector<string>, int> possiblePairing;  //this is all the subsets of group
354                 
355                 //go through the sets
356                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
357                         //are you a subset of bigset, then save in possiblePairings
358                         if (isSubset(group, it2->first) == true) {  possiblePairing[it2->first] = it2->second;  }
359                 }               
360         
361                 map< vector< vector<string> > , int > rating;
362                 
363                 while (possiblePairing.size() != 0) {
364                 
365                         it2 = possiblePairing.begin(); 
366                         vector<string> temprest = getRestSet(group, it2->first);
367                         
368                         //is the rest a set available in possiblePairings
369                         ittemp = possiblePairing.find(temprest);
370                         if (ittemp != possiblePairing.end()) {  //if the rest is in the possible pairings then add this pair to rating map
371                                 vector< vector<string> > temprate;
372                                 temprate.push_back(it2->first);  temprate.push_back(temprest);
373                                 
374                                 rating[temprate] = (nodePairsInitialRate[it2->first] + nodePairsInitialRate[temprest]);
375                                 
376                                 //erase so you dont add 1,2 and 2,1.
377                                 possiblePairing.erase(temprest);
378                         }
379                         
380                         possiblePairing.erase(it2);
381                 }
382
383
384                 it3 = rating.begin();
385                 rate = it3->second;
386                 vector< vector<string> > topPair = it3->first;
387                 
388                 //choose the split with the best rating
389                 for (it3 = rating.begin(); it3 != rating.end(); it3++) {
390                         
391                         if (it3->second > rate) {  rate = it3->second;  topPair = it3->first;  }
392                 }
393                 
394                 bestSplit[group] = topPair;
395                 
396                 return rate;
397         }
398         catch(exception& e) {
399                 m->errorOut(e, "ConcensusCommand", "getSubgroupRating");
400                 exit(1);
401         }
402 }
403
404 //**********************************************************************************************************************
405 vector<string> ConcensusCommand::getRestSet(vector<string> bigset, vector<string> subset) {
406         try {
407                 vector<string> rest;
408                 
409                 for (int i = 0; i < bigset.size(); i++) {
410                         bool inSubset = false;
411                         for (int j = 0; j < subset.size(); j++) {
412                                 if (bigset[i] == subset[j]) { inSubset = true; break; }
413                         }
414                         
415                         //its not in the subset so put it in the rest
416                         if (inSubset == false) { rest.push_back(bigset[i]); }
417                 }
418
419                 return rest;
420         
421         }
422         catch(exception& e) {
423                 m->errorOut(e, "ConcensusCommand", "getRestSet");
424                 exit(1);
425         }
426 }
427
428 //**********************************************************************************************************************
429 bool ConcensusCommand::isSubset(vector<string> bigset, vector<string> subset) {
430         try {
431                 
432         
433                 if (subset.size() > bigset.size()) { return false;  }
434                 
435                 //check if each guy in suset is also in bigset
436                 for (int i = 0; i < subset.size(); i++) {
437                         bool match = false;
438                         for (int j = 0; j < bigset.size(); j++) {
439                                 if (subset[i] == bigset[j]) { match = true; break; }
440                         }
441                         
442                         //you have a guy in subset that had no match in bigset
443                         if (match == false) { return false; }
444                 }
445                 
446                 return true;
447         
448         }
449         catch(exception& e) {
450                 m->errorOut(e, "ConcensusCommand", "isSubset");
451                 exit(1);
452         }
453 }
454 //**********************************************************************************************************************
455 int ConcensusCommand::findSpot(string node) {
456         try {
457                 int spot;
458                 
459                 //check if each guy in suset is also in bigset
460                 for (int i = 0; i < treeSet.size(); i++) {
461                         if (treeSet[i] == node) { spot = i; break; }
462                 }
463                 
464                 return spot;
465         
466         }
467         catch(exception& e) {
468                 m->errorOut(e, "ConcensusCommand", "findSpot");
469                 exit(1);
470         }
471 }
472 //**********************************************************************************************************************
473
474
475
476