]> git.donarmstrong.com Git - mothur.git/blob - consensus.cpp
changes while testing
[mothur.git] / consensus.cpp
1 /*
2  *  consensuscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/29/09.
6  *  Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
7  *
8  */
9
10 #include "consensus.h"
11
12 //**********************************************************************************************************************
13 Tree* Consensus::getTree(vector<Tree*>& t){
14         try {
15                 numNodes = t[0]->getNumNodes();
16                 numLeaves = t[0]->getNumLeaves();
17         numTrees = t.size();
18                 
19                 //get the possible pairings
20                 getSets(t);     
21                 
22                 if (m->control_pressed) { return 0; }
23                 
24                 consensusTree = new Tree(t[0]->getCountTable());
25                 
26                 it2 = nodePairs.find(treeSet);
27                 
28                 nodePairsInTree[treeSet] = it2->second; 
29                 
30                 //erase treeset because you are adding it
31                 nodePairs.erase(treeSet);
32                 
33                 //set count to numLeaves;
34                 count = numLeaves;
35                 
36                 buildConsensusTree(treeSet);
37                 
38                 if (m->control_pressed) {  delete consensusTree; return 0; }
39                 
40                 consensusTree->assembleTree();
41                 
42                 if (m->control_pressed) {  delete consensusTree; return 0; }
43                                 
44                 return consensusTree; 
45                 
46                 return 0;
47         }
48         catch(exception& e) {
49                 m->errorOut(e, "Consensus", "execute");
50                 exit(1);
51         }
52 }
53 //**********************************************************************************************************************
54 int Consensus::printSetsInfo() {
55         try {
56         //open file for pairing not included in the tree
57                 string notIncluded = "cons.pairs";  
58                 ofstream out2;
59         m->openOutputFile(notIncluded, out2);
60
61         //output species in order
62                 out2 << "Species in Order: " << endl << endl;
63                 for (int i = 0; i < treeSet.size(); i++) {  out2 << i+1 << ".  " << treeSet[i] << endl; }
64                 
65                 //output sets included
66                 out2 << endl << "Sets included in the consensus tree:" << endl << endl;
67                 
68                 if (m->control_pressed) {  return 0; }
69                 
70                 vector<string> temp;
71                 for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) {
72             
73                         if (m->control_pressed) {  return 0; }
74                         
75                         //only output pairs not leaves
76                         if (it2->first.size() > 1) { 
77                                 temp.clear();
78                                 //initialize temp to all "."
79                                 temp.resize(treeSet.size(), ".");
80                                 
81                                 //set the spot in temp that represents it2->first[i] to a "*" 
82                                 for (int i = 0; i < it2->first.size(); i++) {
83                                         //find spot 
84                                         int index = findSpot(it2->first[i]);
85                                         temp[index] = "*";
86                                         //temp[index] = it2->first[i] + "  ";
87                                 }
88                                 
89                                 //output temp
90                                 for (int j = 0; j < temp.size(); j++) { 
91                                         out2 << temp[j];
92                                 }
93                                 out2 << '\t' << it2->second << endl;
94                         }
95                 }
96                 
97                 //output sets not included
98                 out2 << endl << "Sets NOT included in the consensus tree:" << endl << endl;
99                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
100             
101                         if (m->control_pressed) { return 0; }
102                         
103                         temp.clear();
104                         //initialize temp to all "."
105                         temp.resize(treeSet.size(), ".");
106             
107                         //set the spot in temp that represents it2->first[i] to a "*" 
108                         for (int i = 0; i < it2->first.size(); i++) {
109                                 //find spot 
110                                 int index = findSpot(it2->first[i]);
111                                 temp[index] = "*";
112                         }
113             
114                         //output temp
115                         for (int j = 0; j < temp.size(); j++) { 
116                                 out2 << temp[j];
117                         }
118                         out2 << '\t' << it2->second << endl;
119                 }
120         
121         return 0;
122     }
123         catch(exception& e) {
124                 m->errorOut(e, "Consensus", "printSetsInfo");
125                 exit(1);
126         }
127 }      
128 //**********************************************************************************************************************
129 int Consensus::buildConsensusTree(vector<string> nodeSet) {
130         try {
131                 vector<string> leftChildSet;
132                 vector<string> rightChildSet;
133                 
134                 if (m->control_pressed) { return 1; }
135                 
136                 //if you are at a leaf
137                 if (nodeSet.size() == 1) {
138                         //return the vector index of the leaf you are at
139                         return consensusTree->getIndex(nodeSet[0]);
140                 //terminate recursion
141                 }else if (count == numNodes) { return 0; }
142                 else {
143                         //finds best child pair
144                         leftChildSet = getNextAvailableSet(nodeSet, rightChildSet);
145                         int left = buildConsensusTree(leftChildSet);
146                         int right = buildConsensusTree(rightChildSet);
147                         consensusTree->tree[count].setChildren(left, right);
148                         consensusTree->tree[count].setLabel((nodePairsInTree[nodeSet]/(float)numTrees)); 
149                         consensusTree->tree[left].setParent(count);
150                         consensusTree->tree[right].setParent(count);
151                         count++;
152                         return (count-1);
153                 }
154         
155         }
156         catch(exception& e) {
157                 m->errorOut(e, "Consensus", "buildConcensusTree");
158                 exit(1);
159         }
160 }
161
162 //**********************************************************************************************************************
163 int Consensus::getSets(vector<Tree*>& t) {
164         try {
165                 vector<string> temp;
166                 treeSet.clear();
167                 
168                 //for each tree add the possible pairs you find
169                 for (int i = 0; i < t.size(); i++) {
170                         
171                         //for each non-leaf node get descendant info.
172                         for (int j = numLeaves; j < numNodes; j++) {
173                                 
174                                 if (m->control_pressed) { return 1; }
175                                 
176                                 temp.clear();
177                                 //go through pcounts and pull out descendants
178                                 for (it = t[i]->tree[j].pcount.begin(); it != t[i]->tree[j].pcount.end(); it++) {
179                                         temp.push_back(it->first);
180                                 }
181                                 
182                                 //sort temp
183                                 sort(temp.begin(), temp.end());
184                                 
185                                 it2 = nodePairs.find(temp);
186                                 if (it2 != nodePairs.end()) {
187                                         nodePairs[temp]++;
188                                 }else{
189                                         nodePairs[temp] = 1;
190                                 }
191                         }
192                 }
193                 
194                 
195                 //add each leaf to terminate recursion in consensus
196                 //you want the leaves in there but with insignifigant sightings value so it is added last
197                 //for each leaf node get descendant info.
198                 for (int j = 0; j < numLeaves; j++) {
199                 
200                         if (m->control_pressed) { return 1; }
201             
202                         //only need the first one since leaves have no descendants but themselves
203                         it = t[0]->tree[j].pcount.begin(); 
204                         temp.clear();  temp.push_back(it->first);
205                         
206                         //fill treeSet
207                         treeSet.push_back(it->first);
208                         
209                         //add leaf to list but with sighting value less then all non leaf pairs 
210                         nodePairs[temp] = 0;
211                 }
212
213                 sort(treeSet.begin(), treeSet.end());
214                 
215                 
216                 map< vector<string>, int> nodePairsCopy = nodePairs;
217                 
218                 
219                 //set initial rating on pairs to sightings + subgroup sightings
220                 while (nodePairsCopy.size() != 0) {
221                         if (m->control_pressed) { return 1; }
222                 
223                         vector<string> smallOne = getSmallest(nodePairsCopy);
224                         
225                         int subgrouprate = getSubgroupRating(smallOne);
226                 
227                         nodePairsInitialRate[smallOne] = nodePairs[smallOne] + subgrouprate;
228                         
229                         nodePairsCopy.erase(smallOne);
230                 }
231                 
232                 return 0;
233         }
234         catch(exception& e) {
235                 m->errorOut(e, "Consensus", "getSets");
236                 exit(1);
237         }
238 }
239 //**********************************************************************************************************************
240 vector<string> Consensus::getSmallest(map< vector<string>, int> nodes) {
241         try{
242                 vector<string> smallest = nodes.begin()->first;
243                 int smallsize = smallest.size();
244                 
245                 for(it2 = nodes.begin(); it2 != nodes.end(); it2++) {
246                         if(it2->first.size() < smallsize) { smallsize = it2->first.size();  smallest = it2->first;  }
247                 }
248                 
249                 return smallest;
250         }
251         catch(exception& e) {
252                 m->errorOut(e, "Consensus", "getSmallest");
253                 exit(1);
254         }
255 }
256
257 //**********************************************************************************************************************
258 vector<string> Consensus::getNextAvailableSet(vector<string> bigset, vector<string>& rest) {
259         try {
260 //cout << "new call " << endl << endl << endl;
261                 vector<string> largest; largest.clear();
262                 rest.clear();
263                 
264                 //if you are just 2 groups
265                 if (bigset.size() == 2)  {   
266                         rest.push_back(bigset[0]);
267                         largest.push_back(bigset[1]);
268                 }else{
269                         rest = bestSplit[bigset][0];
270                         largest = bestSplit[bigset][1];
271                 }
272                 
273                 
274                 //save for printing out later and for branch lengths
275                 nodePairsInTree[rest] = nodePairs[rest];
276                 
277                 //delete whatever set you return because it is no longer available
278                 nodePairs.erase(rest);
279
280                 //save for printing out later and for branch lengths
281                 nodePairsInTree[largest] = nodePairs[largest];
282                 
283                 //delete whatever set you return because it is no longer available
284                 nodePairs.erase(largest);
285                 
286                 return largest;
287         
288         }
289         catch(exception& e) {
290                 m->errorOut(e, "Consensus", "getNextAvailableSet");
291                 exit(1);
292         }
293 }
294
295 /**********************************************************************************************************************/
296 int Consensus::getSubgroupRating(vector<string> group) {
297         try {
298                 map< vector<string>, int>::iterator ittemp;
299                 map< vector< vector<string> > , int >::iterator it3;
300                 int rate = 0;
301                 
302                 // ***********************************************************************************//
303                 //1. this function must be called passing it littlest sets to biggest 
304                 //              since it the rating is made from your sighting plus you best splits rating
305                 //2. it saves the top pair to use later
306                 // ***********************************************************************************//
307
308                 
309                 if (group.size() < 3) {  return rate;  }
310                 
311                 map< vector<string>, int> possiblePairing;  //this is all the subsets of group
312                 
313                 //go through the sets
314                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
315                         //are you a subset of bigset, then save in possiblePairings
316                         if (isSubset(group, it2->first) == true) {  possiblePairing[it2->first] = it2->second;  }
317                 }               
318         
319                 map< vector< vector<string> > , int > rating;
320                 
321                 while (possiblePairing.size() != 0) {
322                 
323                         it2 = possiblePairing.begin(); 
324                         vector<string> temprest = getRestSet(group, it2->first);
325                         
326                         //is the rest a set available in possiblePairings
327                         ittemp = possiblePairing.find(temprest);
328                         if (ittemp != possiblePairing.end()) {  //if the rest is in the possible pairings then add this pair to rating map
329                                 vector< vector<string> > temprate;
330                                 temprate.push_back(it2->first);  temprate.push_back(temprest);
331                                 
332                                 rating[temprate] = (nodePairsInitialRate[it2->first] + nodePairsInitialRate[temprest]);
333                                 
334                                 //erase so you dont add 1,2 and 2,1.
335                                 possiblePairing.erase(temprest);
336                         }
337                         
338                         possiblePairing.erase(it2);
339                 }
340
341
342                 it3 = rating.begin();
343                 rate = it3->second;
344                 vector< vector<string> > topPair = it3->first;
345                 
346                 //choose the split with the best rating
347                 for (it3 = rating.begin(); it3 != rating.end(); it3++) {
348                         
349                         if (it3->second > rate) {  rate = it3->second;  topPair = it3->first;  }
350                 }
351                 
352                 bestSplit[group] = topPair;
353                 
354                 return rate;
355         }
356         catch(exception& e) {
357                 m->errorOut(e, "Consensus", "getSubgroupRating");
358                 exit(1);
359         }
360 }
361
362 //**********************************************************************************************************************
363 vector<string> Consensus::getRestSet(vector<string> bigset, vector<string> subset) {
364         try {
365                 vector<string> rest;
366                 
367                 for (int i = 0; i < bigset.size(); i++) {
368                         bool inSubset = false;
369                         for (int j = 0; j < subset.size(); j++) {
370                                 if (bigset[i] == subset[j]) { inSubset = true; break; }
371                         }
372                         
373                         //its not in the subset so put it in the rest
374                         if (inSubset == false) { rest.push_back(bigset[i]); }
375                 }
376
377                 return rest;
378         
379         }
380         catch(exception& e) {
381                 m->errorOut(e, "Consensus", "getRestSet");
382                 exit(1);
383         }
384 }
385
386 //**********************************************************************************************************************
387 bool Consensus::isSubset(vector<string> bigset, vector<string> subset) {
388         try {
389                 
390         
391                 if (subset.size() > bigset.size()) { return false;  }
392                 
393                 //check if each guy in suset is also in bigset
394                 for (int i = 0; i < subset.size(); i++) {
395                         bool match = false;
396                         for (int j = 0; j < bigset.size(); j++) {
397                                 if (subset[i] == bigset[j]) { match = true; break; }
398                         }
399                         
400                         //you have a guy in subset that had no match in bigset
401                         if (match == false) { return false; }
402                 }
403                 
404                 return true;
405         
406         }
407         catch(exception& e) {
408                 m->errorOut(e, "Consensus", "isSubset");
409                 exit(1);
410         }
411 }
412 //**********************************************************************************************************************
413 int Consensus::findSpot(string node) {
414         try {
415                 int spot;
416                 
417                 //check if each guy in suset is also in bigset
418                 for (int i = 0; i < treeSet.size(); i++) {
419                         if (treeSet[i] == node) { spot = i; break; }
420                 }
421                 
422                 return spot;
423         
424         }
425         catch(exception& e) {
426                 m->errorOut(e, "Consensus", "findSpot");
427                 exit(1);
428         }
429 }
430 //**********************************************************************************************************************
431
432
433
434