]> git.donarmstrong.com Git - mothur.git/blob - consensus.cpp
updated kruskalwallis struct
[mothur.git] / consensus.cpp
1 /*
2  *  consensuscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/29/09.
6  *  Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
7  *
8  */
9
10 #include "consensus.h"
11
12 //**********************************************************************************************************************
13 Tree* Consensus::getTree(vector<Tree*>& t){
14         try {
15                 numNodes = t[0]->getNumNodes();
16                 numLeaves = t[0]->getNumLeaves();
17         numTrees = t.size();
18                 
19                 //get the possible pairings
20                 getSets(t);     
21                 
22                 if (m->control_pressed) { return 0; }
23                 
24                 consensusTree = new Tree(t[0]->getTreeMap());
25                 
26                 it2 = nodePairs.find(treeSet);
27                 
28                 nodePairsInTree[treeSet] = it2->second; 
29                 
30                 //erase treeset because you are adding it
31                 nodePairs.erase(treeSet);
32                 
33                 //set count to numLeaves;
34                 count = numLeaves;
35                 
36                 buildConsensusTree(treeSet);
37                 
38                 if (m->control_pressed) {  delete consensusTree; return 0; }
39                 
40         map<string, string> empty;
41                 consensusTree->assembleTree(empty);
42                 
43                 if (m->control_pressed) {  delete consensusTree; return 0; }
44                                 
45                 return consensusTree; 
46                 
47                 return 0;
48         }
49         catch(exception& e) {
50                 m->errorOut(e, "Consensus", "execute");
51                 exit(1);
52         }
53 }
54 //**********************************************************************************************************************
55 int Consensus::printSetsInfo() {
56         try {
57         //open file for pairing not included in the tree
58                 string notIncluded = "cons.pairs";  
59                 ofstream out2;
60         m->openOutputFile(notIncluded, out2);
61
62         //output species in order
63                 out2 << "Species in Order: " << endl << endl;
64                 for (int i = 0; i < treeSet.size(); i++) {  out2 << i+1 << ".  " << treeSet[i] << endl; }
65                 
66                 //output sets included
67                 out2 << endl << "Sets included in the consensus tree:" << endl << endl;
68                 
69                 if (m->control_pressed) {  return 0; }
70                 
71                 vector<string> temp;
72                 for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) {
73             
74                         if (m->control_pressed) {  return 0; }
75                         
76                         //only output pairs not leaves
77                         if (it2->first.size() > 1) { 
78                                 temp.clear();
79                                 //initialize temp to all "."
80                                 temp.resize(treeSet.size(), ".");
81                                 
82                                 //set the spot in temp that represents it2->first[i] to a "*" 
83                                 for (int i = 0; i < it2->first.size(); i++) {
84                                         //find spot 
85                                         int index = findSpot(it2->first[i]);
86                                         temp[index] = "*";
87                                         //temp[index] = it2->first[i] + "  ";
88                                 }
89                                 
90                                 //output temp
91                                 for (int j = 0; j < temp.size(); j++) { 
92                                         out2 << temp[j];
93                                 }
94                                 out2 << '\t' << it2->second << endl;
95                         }
96                 }
97                 
98                 //output sets not included
99                 out2 << endl << "Sets NOT included in the consensus tree:" << endl << endl;
100                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
101             
102                         if (m->control_pressed) { return 0; }
103                         
104                         temp.clear();
105                         //initialize temp to all "."
106                         temp.resize(treeSet.size(), ".");
107             
108                         //set the spot in temp that represents it2->first[i] to a "*" 
109                         for (int i = 0; i < it2->first.size(); i++) {
110                                 //find spot 
111                                 int index = findSpot(it2->first[i]);
112                                 temp[index] = "*";
113                         }
114             
115                         //output temp
116                         for (int j = 0; j < temp.size(); j++) { 
117                                 out2 << temp[j];
118                         }
119                         out2 << '\t' << it2->second << endl;
120                 }
121         
122         return 0;
123     }
124         catch(exception& e) {
125                 m->errorOut(e, "Consensus", "printSetsInfo");
126                 exit(1);
127         }
128 }      
129 //**********************************************************************************************************************
130 int Consensus::buildConsensusTree(vector<string> nodeSet) {
131         try {
132                 vector<string> leftChildSet;
133                 vector<string> rightChildSet;
134                 
135                 if (m->control_pressed) { return 1; }
136                 
137                 //if you are at a leaf
138                 if (nodeSet.size() == 1) {
139                         //return the vector index of the leaf you are at
140                         return consensusTree->getIndex(nodeSet[0]);
141                 //terminate recursion
142                 }else if (count == numNodes) { return 0; }
143                 else {
144                         //finds best child pair
145                         leftChildSet = getNextAvailableSet(nodeSet, rightChildSet);
146                         int left = buildConsensusTree(leftChildSet);
147                         int right = buildConsensusTree(rightChildSet);
148                         consensusTree->tree[count].setChildren(left, right);
149                         consensusTree->tree[count].setLabel((nodePairsInTree[nodeSet]/(float)numTrees)); 
150                         consensusTree->tree[left].setParent(count);
151                         consensusTree->tree[right].setParent(count);
152                         count++;
153                         return (count-1);
154                 }
155         
156         }
157         catch(exception& e) {
158                 m->errorOut(e, "Consensus", "buildConcensusTree");
159                 exit(1);
160         }
161 }
162
163 //**********************************************************************************************************************
164 int Consensus::getSets(vector<Tree*>& t) {
165         try {
166                 vector<string> temp;
167                 treeSet.clear();
168                 
169                 //for each tree add the possible pairs you find
170                 for (int i = 0; i < t.size(); i++) {
171                         
172                         //for each non-leaf node get descendant info.
173                         for (int j = numLeaves; j < numNodes; j++) {
174                                 
175                                 if (m->control_pressed) { return 1; }
176                                 
177                                 temp.clear();
178                                 //go through pcounts and pull out descendants
179                                 for (it = t[i]->tree[j].pcount.begin(); it != t[i]->tree[j].pcount.end(); it++) {
180                                         temp.push_back(it->first);
181                                 }
182                                 
183                                 //sort temp
184                                 sort(temp.begin(), temp.end());
185                                 
186                                 it2 = nodePairs.find(temp);
187                                 if (it2 != nodePairs.end()) {
188                                         nodePairs[temp]++;
189                                 }else{
190                                         nodePairs[temp] = 1;
191                                 }
192                         }
193                 }
194                 
195                 
196                 //add each leaf to terminate recursion in consensus
197                 //you want the leaves in there but with insignifigant sightings value so it is added last
198                 //for each leaf node get descendant info.
199                 for (int j = 0; j < numLeaves; j++) {
200                 
201                         if (m->control_pressed) { return 1; }
202             
203                         //only need the first one since leaves have no descendants but themselves
204                         it = t[0]->tree[j].pcount.begin(); 
205                         temp.clear();  temp.push_back(it->first);
206                         
207                         //fill treeSet
208                         treeSet.push_back(it->first);
209                         
210                         //add leaf to list but with sighting value less then all non leaf pairs 
211                         nodePairs[temp] = 0;
212                 }
213
214                 sort(treeSet.begin(), treeSet.end());
215                 
216                 
217                 map< vector<string>, int> nodePairsCopy = nodePairs;
218                 
219                 
220                 //set initial rating on pairs to sightings + subgroup sightings
221                 while (nodePairsCopy.size() != 0) {
222                         if (m->control_pressed) { return 1; }
223                 
224                         vector<string> smallOne = getSmallest(nodePairsCopy);
225                         
226                         int subgrouprate = getSubgroupRating(smallOne);
227                 
228                         nodePairsInitialRate[smallOne] = nodePairs[smallOne] + subgrouprate;
229                         
230                         nodePairsCopy.erase(smallOne);
231                 }
232                 
233                 return 0;
234         }
235         catch(exception& e) {
236                 m->errorOut(e, "Consensus", "getSets");
237                 exit(1);
238         }
239 }
240 //**********************************************************************************************************************
241 vector<string> Consensus::getSmallest(map< vector<string>, int> nodes) {
242         try{
243                 vector<string> smallest = nodes.begin()->first;
244                 int smallsize = smallest.size();
245                 
246                 for(it2 = nodes.begin(); it2 != nodes.end(); it2++) {
247                         if(it2->first.size() < smallsize) { smallsize = it2->first.size();  smallest = it2->first;  }
248                 }
249                 
250                 return smallest;
251         }
252         catch(exception& e) {
253                 m->errorOut(e, "Consensus", "getSmallest");
254                 exit(1);
255         }
256 }
257
258 //**********************************************************************************************************************
259 vector<string> Consensus::getNextAvailableSet(vector<string> bigset, vector<string>& rest) {
260         try {
261 //cout << "new call " << endl << endl << endl;
262                 vector<string> largest; largest.clear();
263                 rest.clear();
264                 
265                 //if you are just 2 groups
266                 if (bigset.size() == 2)  {   
267                         rest.push_back(bigset[0]);
268                         largest.push_back(bigset[1]);
269                 }else{
270                         rest = bestSplit[bigset][0];
271                         largest = bestSplit[bigset][1];
272                 }
273                 
274                 
275                 //save for printing out later and for branch lengths
276                 nodePairsInTree[rest] = nodePairs[rest];
277                 
278                 //delete whatever set you return because it is no longer available
279                 nodePairs.erase(rest);
280
281                 //save for printing out later and for branch lengths
282                 nodePairsInTree[largest] = nodePairs[largest];
283                 
284                 //delete whatever set you return because it is no longer available
285                 nodePairs.erase(largest);
286                 
287                 return largest;
288         
289         }
290         catch(exception& e) {
291                 m->errorOut(e, "Consensus", "getNextAvailableSet");
292                 exit(1);
293         }
294 }
295
296 /**********************************************************************************************************************/
297 int Consensus::getSubgroupRating(vector<string> group) {
298         try {
299                 map< vector<string>, int>::iterator ittemp;
300                 map< vector< vector<string> > , int >::iterator it3;
301                 int rate = 0;
302                 
303                 // ***********************************************************************************//
304                 //1. this function must be called passing it littlest sets to biggest 
305                 //              since it the rating is made from your sighting plus you best splits rating
306                 //2. it saves the top pair to use later
307                 // ***********************************************************************************//
308
309                 
310                 if (group.size() < 3) {  return rate;  }
311                 
312                 map< vector<string>, int> possiblePairing;  //this is all the subsets of group
313                 
314                 //go through the sets
315                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
316                         //are you a subset of bigset, then save in possiblePairings
317                         if (isSubset(group, it2->first) == true) {  possiblePairing[it2->first] = it2->second;  }
318                 }               
319         
320                 map< vector< vector<string> > , int > rating;
321                 
322                 while (possiblePairing.size() != 0) {
323                 
324                         it2 = possiblePairing.begin(); 
325                         vector<string> temprest = getRestSet(group, it2->first);
326                         
327                         //is the rest a set available in possiblePairings
328                         ittemp = possiblePairing.find(temprest);
329                         if (ittemp != possiblePairing.end()) {  //if the rest is in the possible pairings then add this pair to rating map
330                                 vector< vector<string> > temprate;
331                                 temprate.push_back(it2->first);  temprate.push_back(temprest);
332                                 
333                                 rating[temprate] = (nodePairsInitialRate[it2->first] + nodePairsInitialRate[temprest]);
334                                 
335                                 //erase so you dont add 1,2 and 2,1.
336                                 possiblePairing.erase(temprest);
337                         }
338                         
339                         possiblePairing.erase(it2);
340                 }
341
342
343                 it3 = rating.begin();
344                 rate = it3->second;
345                 vector< vector<string> > topPair = it3->first;
346                 
347                 //choose the split with the best rating
348                 for (it3 = rating.begin(); it3 != rating.end(); it3++) {
349                         
350                         if (it3->second > rate) {  rate = it3->second;  topPair = it3->first;  }
351                 }
352                 
353                 bestSplit[group] = topPair;
354                 
355                 return rate;
356         }
357         catch(exception& e) {
358                 m->errorOut(e, "Consensus", "getSubgroupRating");
359                 exit(1);
360         }
361 }
362
363 //**********************************************************************************************************************
364 vector<string> Consensus::getRestSet(vector<string> bigset, vector<string> subset) {
365         try {
366                 vector<string> rest;
367                 
368                 for (int i = 0; i < bigset.size(); i++) {
369                         bool inSubset = false;
370                         for (int j = 0; j < subset.size(); j++) {
371                                 if (bigset[i] == subset[j]) { inSubset = true; break; }
372                         }
373                         
374                         //its not in the subset so put it in the rest
375                         if (inSubset == false) { rest.push_back(bigset[i]); }
376                 }
377
378                 return rest;
379         
380         }
381         catch(exception& e) {
382                 m->errorOut(e, "Consensus", "getRestSet");
383                 exit(1);
384         }
385 }
386
387 //**********************************************************************************************************************
388 bool Consensus::isSubset(vector<string> bigset, vector<string> subset) {
389         try {
390                 
391         
392                 if (subset.size() > bigset.size()) { return false;  }
393                 
394                 //check if each guy in suset is also in bigset
395                 for (int i = 0; i < subset.size(); i++) {
396                         bool match = false;
397                         for (int j = 0; j < bigset.size(); j++) {
398                                 if (subset[i] == bigset[j]) { match = true; break; }
399                         }
400                         
401                         //you have a guy in subset that had no match in bigset
402                         if (match == false) { return false; }
403                 }
404                 
405                 return true;
406         
407         }
408         catch(exception& e) {
409                 m->errorOut(e, "Consensus", "isSubset");
410                 exit(1);
411         }
412 }
413 //**********************************************************************************************************************
414 int Consensus::findSpot(string node) {
415         try {
416                 int spot;
417                 
418                 //check if each guy in suset is also in bigset
419                 for (int i = 0; i < treeSet.size(); i++) {
420                         if (treeSet[i] == node) { spot = i; break; }
421                 }
422                 
423                 return spot;
424         
425         }
426         catch(exception& e) {
427                 m->errorOut(e, "Consensus", "findSpot");
428                 exit(1);
429         }
430 }
431 //**********************************************************************************************************************
432
433
434
435