]> git.donarmstrong.com Git - mothur.git/blob - consensuscommand.cpp
dc278017c906dcfeb9f7967bc99f767df7007011
[mothur.git] / consensuscommand.cpp
1 /*
2  *  consensuscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/29/09.
6  *  Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
7  *
8  */
9
10 #include "consensuscommand.h"
11
12 //**********************************************************************************************************************
13
14 ConcensusCommand::ConcensusCommand(string fileroot)  {
15         try {
16                 globaldata = GlobalData::getInstance();
17                 abort = false;
18                 
19                 filename = fileroot;
20                 
21                 t = globaldata->gTree;
22         
23         }
24         catch(exception& e) {
25                 m->errorOut(e, "ConcensusCommand", "ConcensusCommand");
26                 exit(1);
27         }
28 }
29
30 //**********************************************************************************************************************
31
32 void ConcensusCommand::help(){
33         try {
34                 m->mothurOut("The consensus command can only be executed after a successful read.tree command.\n");
35                 m->mothurOut("The consensus command has no parameters.\n");
36                 m->mothurOut("The consensus command should be in the following format: consensus().\n");
37                 m->mothurOut("The consensus command output two files: .consensus.tre and .consensuspairs.\n");
38                 m->mothurOut("The .consensus.tre file contains the consensus tree of the trees in your input file.\n");
39                 m->mothurOut("The branch lengths are the percentage of trees in your input file that had the given pair.\n");
40                 m->mothurOut("The .consensuspairs file contains a list of the internal nodes in your tree.  For each node, the pair that was used in the consensus tree \n");
41                 m->mothurOut("is reported with its percentage, as well as the other pairs that were seen for that node but not used and their percentages.\n\n");               
42         }
43         catch(exception& e) {
44                 m->errorOut(e, "ConcensusCommand", "help");
45                 exit(1);
46         }
47 }
48
49 //**********************************************************************************************************************
50
51 ConcensusCommand::~ConcensusCommand(){}
52
53 //**********************************************************************************************************************
54
55 int ConcensusCommand::execute(){
56         try {
57                 
58                 if (abort == true) { return 0; }
59                 else {  
60                         numNodes = t[0]->getNumNodes();
61                         numLeaves = t[0]->getNumLeaves();
62                 }
63                 
64                 //get the possible pairings
65                 getSets();              
66                 
67                 //open file for pairing not included in the tree
68                 notIncluded = filename + ".cons.pairs";
69                 openOutputFile(notIncluded, out2);
70                 
71                 consensusTree = new Tree();
72                 
73                 it2 = nodePairs.find(treeSet);
74                 
75                 nodePairsInTree[treeSet] = it2->second; 
76                 
77                 //erase treeset because you are adding it
78                 nodePairs.erase(treeSet);
79                 
80                 //set count to numLeaves;
81                 count = numLeaves;
82                 
83                 buildConcensusTree(treeSet);
84                 
85                 consensusTree->assembleTree();
86                 
87                 //output species in order
88                 out2 << "Species in Order: " << endl << endl;
89                 for (int i = 0; i < treeSet.size(); i++) {  out2 << i+1 << ".  " << treeSet[i] << endl; }
90                 
91                 //output sets included
92                 out2 << endl << "Sets included in the consensus tree:" << endl << endl;
93                 
94                 vector<string> temp;
95                 for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) {
96                         //only output pairs not leaves
97                         if (it2->first.size() > 1) { 
98                                 temp.clear();
99                                 //initialize temp to all "."
100                                 temp.resize(treeSet.size(), ".");
101                                 
102                                 //set the spot in temp that represents it2->first[i] to a "*" 
103                                 for (int i = 0; i < it2->first.size(); i++) {
104                                         //find spot 
105                                         int index = findSpot(it2->first[i]);
106                                         temp[index] = "*";
107                                         //temp[index] = it2->first[i] + "  ";
108                                 }
109                                 
110                                 //output temp
111                                 for (int j = 0; j < temp.size(); j++) { 
112                                         out2 << temp[j];
113                                 }
114                                 out2 << '\t' << it2->second << endl;
115                         }
116                 }
117                 
118                 //output sets not included
119                 out2 << endl << "Sets NOT included in the consensus tree:" << endl << endl;
120                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
121                         temp.clear();
122                         //initialize temp to all "."
123                         temp.resize(treeSet.size(), ".");
124                                 
125                         //set the spot in temp that represents it2->first[i] to a "*" 
126                         for (int i = 0; i < it2->first.size(); i++) {
127                                 //find spot 
128                                 int index = findSpot(it2->first[i]);
129                                 temp[index] = "*";
130                         }
131                                 
132                         //output temp
133                         for (int j = 0; j < temp.size(); j++) { 
134                                 out2 << temp[j];
135                         }
136                         out2 << '\t' << it2->second << endl;
137                 }
138                 
139                 outputFile = filename + ".cons.tre";
140                 openOutputFile(outputFile, out);
141                 
142                 consensusTree->printForBoot(out);
143                 
144                 out.close(); out2.close();
145                 
146                 delete consensusTree; 
147                 
148                 return 0;
149         }
150         catch(exception& e) {
151                 m->errorOut(e, "ConcensusCommand", "execute");
152                 exit(1);
153         }
154 }
155
156 //**********************************************************************************************************************
157 int ConcensusCommand::buildConcensusTree(vector<string> nodeSet) {
158         try {
159                 vector<string> leftChildSet;
160                 vector<string> rightChildSet;
161                 
162                 //if you are at a leaf
163                 if (nodeSet.size() == 1) {
164                         //return the vector index of the leaf you are at
165                         return consensusTree->getIndex(nodeSet[0]);
166                 //terminate recursion
167                 }else if (count == numNodes) { return 0; }
168                 else {
169                         //finds best child pair
170                         leftChildSet = getNextAvailableSet(nodeSet, rightChildSet);
171                         int left = buildConcensusTree(leftChildSet);
172                         int right = buildConcensusTree(rightChildSet);
173                         consensusTree->tree[count].setChildren(left, right);
174                         consensusTree->tree[count].setLabel(nodePairsInTree[nodeSet]); 
175                         consensusTree->tree[left].setParent(count);
176                         consensusTree->tree[right].setParent(count);
177                         count++;
178                         return (count-1);
179                 }
180         
181         }
182         catch(exception& e) {
183                 m->errorOut(e, "ConcensusCommand", "buildConcensusTree");
184                 exit(1);
185         }
186 }
187
188 //**********************************************************************************************************************
189 void ConcensusCommand::getSets() {
190         try {
191                 vector<string> temp;
192                 treeSet.clear();
193                 
194                 //for each tree add the possible pairs you find
195                 for (int i = 0; i < t.size(); i++) {
196                         
197                         //for each non-leaf node get descendant info.
198                         for (int j = numLeaves; j < numNodes; j++) {
199                                 temp.clear();
200                                 //go through pcounts and pull out descendants
201                                 for (it = t[i]->tree[j].pcount.begin(); it != t[i]->tree[j].pcount.end(); it++) {
202                                         temp.push_back(it->first);
203                                 }
204                                 
205                                 //sort temp
206                                 sort(temp.begin(), temp.end());
207                                 
208                                 it2 = nodePairs.find(temp);
209                                 if (it2 != nodePairs.end()) {
210                                         nodePairs[temp]++;
211                                 }else{
212                                         nodePairs[temp] = 1;
213                                 }
214                         }
215                 }
216                 
217                 
218                 //add each leaf to terminate recursion in consensus
219                 //you want the leaves in there but with insignifigant sightings value so it is added last
220                 //for each leaf node get descendant info.
221                 for (int j = 0; j < numLeaves; j++) {
222                         
223                         //only need the first one since leaves have no descendants but themselves
224                         it = t[0]->tree[j].pcount.begin(); 
225                         temp.clear();  temp.push_back(it->first);
226                         
227                         //fill treeSet
228                         treeSet.push_back(it->first);
229                         
230                         //add leaf to list but with sighting value less then all non leaf pairs 
231                         nodePairs[temp] = 0;
232                 }
233
234                 sort(treeSet.begin(), treeSet.end());
235                 
236                 
237                 map< vector<string>, int> nodePairsCopy = nodePairs;
238                 
239                 
240                 //set initial rating on pairs to sightings + subgroup sightings
241                 while (nodePairsCopy.size() != 0) {
242                 
243                         vector<string> small = getSmallest(nodePairsCopy);
244                         
245                         int subgrouprate = getSubgroupRating(small);
246                 
247                         nodePairsInitialRate[small] = nodePairs[small] + subgrouprate;
248                         
249                         nodePairsCopy.erase(small);
250                 }
251                 
252         }
253         catch(exception& e) {
254                 m->errorOut(e, "ConcensusCommand", "getSets");
255                 exit(1);
256         }
257 }
258 //**********************************************************************************************************************
259 vector<string> ConcensusCommand::getSmallest(map< vector<string>, int> nodes) {
260         try{
261                 vector<string> smallest = nodes.begin()->first;
262                 int smallsize = smallest.size();
263                 
264                 for(it2 = nodes.begin(); it2 != nodes.end(); it2++) {
265                         if(it2->first.size() < smallsize) { smallsize = it2->first.size();  smallest = it2->first;  }
266                 }
267                 
268                 return smallest;
269         }
270         catch(exception& e) {
271                 m->errorOut(e, "ConcensusCommand", "getSmallest");
272                 exit(1);
273         }
274 }
275
276 //**********************************************************************************************************************
277 vector<string> ConcensusCommand::getNextAvailableSet(vector<string> bigset, vector<string>& rest) {
278         try {
279 //cout << "new call " << endl << endl << endl;
280                 vector<string> largest; largest.clear();
281                 rest.clear();
282                 
283                 //if you are just 2 groups
284                 if (bigset.size() == 2)  {   
285                         rest.push_back(bigset[0]);
286                         largest.push_back(bigset[1]);
287                 }else{
288                         rest = bestSplit[bigset][0];
289                         largest = bestSplit[bigset][1];
290                 }
291                 
292                 
293                 //save for printing out later and for branch lengths
294                 nodePairsInTree[rest] = nodePairs[rest];
295                 
296                 //delete whatever set you return because it is no longer available
297                 nodePairs.erase(rest);
298
299                 //save for printing out later and for branch lengths
300                 nodePairsInTree[largest] = nodePairs[largest];
301                 
302                 //delete whatever set you return because it is no longer available
303                 nodePairs.erase(largest);
304                 
305                 return largest;
306         
307         }
308         catch(exception& e) {
309                 m->errorOut(e, "ConcensusCommand", "getNextAvailableSet");
310                 exit(1);
311         }
312 }
313
314 /**********************************************************************************************************************/
315 int ConcensusCommand::getSubgroupRating(vector<string> group) {
316         try {
317                 map< vector<string>, int>::iterator ittemp;
318                 map< vector< vector<string> > , int >::iterator it3;
319                 int rate = 0;
320                 
321                 // ***********************************************************************************//
322                 //1. this function must be called passing it littlest sets to biggest 
323                 //              since it the rating is made from your sighting plus you best splits rating
324                 //2. it saves the top pair to use later
325                 // ***********************************************************************************//
326
327                 
328                 if (group.size() < 3) {  return rate;  }
329                 
330                 map< vector<string>, int> possiblePairing;  //this is all the subsets of group
331                 
332                 //go through the sets
333                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
334                         //are you a subset of bigset, then save in possiblePairings
335                         if (isSubset(group, it2->first) == true) {  possiblePairing[it2->first] = it2->second;  }
336                 }               
337         
338                 map< vector< vector<string> > , int > rating;
339                 
340                 while (possiblePairing.size() != 0) {
341                 
342                         it2 = possiblePairing.begin(); 
343                         vector<string> temprest = getRestSet(group, it2->first);
344                         
345                         //is the rest a set available in possiblePairings
346                         ittemp = possiblePairing.find(temprest);
347                         if (ittemp != possiblePairing.end()) {  //if the rest is in the possible pairings then add this pair to rating map
348                                 vector< vector<string> > temprate;
349                                 temprate.push_back(it2->first);  temprate.push_back(temprest);
350                                 
351                                 rating[temprate] = (nodePairsInitialRate[it2->first] + nodePairsInitialRate[temprest]);
352                                 
353                                 //erase so you dont add 1,2 and 2,1.
354                                 possiblePairing.erase(temprest);
355                         }
356                         
357                         possiblePairing.erase(it2);
358                 }
359
360
361                 it3 = rating.begin();
362                 rate = it3->second;
363                 vector< vector<string> > topPair = it3->first;
364                 
365                 //choose the split with the best rating
366                 for (it3 = rating.begin(); it3 != rating.end(); it3++) {
367                         
368                         if (it3->second > rate) {  rate = it3->second;  topPair = it3->first;  }
369                 }
370                 
371                 bestSplit[group] = topPair;
372                 
373                 return rate;
374         }
375         catch(exception& e) {
376                 m->errorOut(e, "ConcensusCommand", "getSubgroupRating");
377                 exit(1);
378         }
379 }
380
381 //**********************************************************************************************************************
382 vector<string> ConcensusCommand::getRestSet(vector<string> bigset, vector<string> subset) {
383         try {
384                 vector<string> rest;
385                 
386                 for (int i = 0; i < bigset.size(); i++) {
387                         bool inSubset = false;
388                         for (int j = 0; j < subset.size(); j++) {
389                                 if (bigset[i] == subset[j]) { inSubset = true; break; }
390                         }
391                         
392                         //its not in the subset so put it in the rest
393                         if (inSubset == false) { rest.push_back(bigset[i]); }
394                 }
395
396                 return rest;
397         
398         }
399         catch(exception& e) {
400                 m->errorOut(e, "ConcensusCommand", "getRestSet");
401                 exit(1);
402         }
403 }
404
405 //**********************************************************************************************************************
406 bool ConcensusCommand::isSubset(vector<string> bigset, vector<string> subset) {
407         try {
408                 
409         
410                 if (subset.size() > bigset.size()) { return false;  }
411                 
412                 //check if each guy in suset is also in bigset
413                 for (int i = 0; i < subset.size(); i++) {
414                         bool match = false;
415                         for (int j = 0; j < bigset.size(); j++) {
416                                 if (subset[i] == bigset[j]) { match = true; break; }
417                         }
418                         
419                         //you have a guy in subset that had no match in bigset
420                         if (match == false) { return false; }
421                 }
422                 
423                 return true;
424         
425         }
426         catch(exception& e) {
427                 m->errorOut(e, "ConcensusCommand", "isSubset");
428                 exit(1);
429         }
430 }
431 //**********************************************************************************************************************
432 int ConcensusCommand::findSpot(string node) {
433         try {
434                 int spot;
435                 
436                 //check if each guy in suset is also in bigset
437                 for (int i = 0; i < treeSet.size(); i++) {
438                         if (treeSet[i] == node) { spot = i; break; }
439                 }
440                 
441                 return spot;
442         
443         }
444         catch(exception& e) {
445                 m->errorOut(e, "ConcensusCommand", "findSpot");
446                 exit(1);
447         }
448 }
449 //**********************************************************************************************************************
450
451
452
453