]> git.donarmstrong.com Git - mothur.git/blob - concensuscommand.cpp
fixed concensus command and modified tree class so you can print labels on trees
[mothur.git] / concensuscommand.cpp
1 /*
2  *  concensuscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/29/09.
6  *  Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
7  *
8  */
9
10 #include "concensuscommand.h"
11
12 //**********************************************************************************************************************
13
14 ConcensusCommand::ConcensusCommand(){
15         try {
16                 globaldata = GlobalData::getInstance();
17                 t = globaldata->gTree;
18         }
19         catch(exception& e) {
20                 cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function ConcensusCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
21                 exit(1);
22         }
23         catch(...) {
24                 cout << "An unknown error has occurred in the ConcensusCommand class function ConcensusCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
25                 exit(1);
26         }       
27 }
28 //**********************************************************************************************************************
29
30 ConcensusCommand::~ConcensusCommand(){}
31
32 //**********************************************************************************************************************
33
34 int ConcensusCommand::execute(){
35         try {
36                 
37                 if (t.size() == 0) { return 0; }
38                 else {  
39                         numNodes = t[0]->getNumNodes();
40                         numLeaves = t[0]->getNumLeaves();
41                 }
42                 
43                 //get the possible pairings
44                 getSets();              
45                 
46                 //print out pairings for testing
47                 /*cout << "possible pairing " <<  endl;
48                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
49                         for (int i = 0; i < it2->first.size(); i++) {
50                                 cout << it2->first[i] << " ";
51                         }
52                         cout << '\t' << it2->second << endl;
53                 }*/
54                 
55                 
56                 //open file for pairing not included in the tree
57                 notIncluded = getRootName(globaldata->inputFileName) + "concensuspairs";
58                 openOutputFile(notIncluded, out2);
59                 
60                 concensusTree = new Tree();
61                 
62                 it2 = nodePairs.find(treeSet);
63                 
64                 nodePairsInTree[treeSet] = it2->second; 
65                 
66                 //erase treeset because you are adding it
67                 nodePairs.erase(treeSet);
68                 
69                 //set count to numLeaves;
70                 count = numLeaves;
71                 
72                 buildConcensusTree(treeSet);
73                 
74                 concensusTree->assembleTree();
75                 
76                 //output species in order
77                 out2 << "Species in Order: " << endl << endl;
78                 for (int i = 0; i < treeSet.size(); i++) {  out2 << i+1 << ".  " << treeSet[i] << endl; }
79                 
80                 vector<string> temp; 
81                 //output sets included
82                 out2 << endl << "Sets included in the concensus tree:" << endl << endl;
83                 for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) {
84                         //only output pairs not leaves
85                         if (it2->first.size() > 1) { 
86                                 temp.clear();
87                                 //initialize temp to all "."
88                                 temp.resize(treeSet.size(), ".");
89                                 
90                                 //set the spot in temp that represents it2->first[i] to a "*" 
91                                 for (int i = 0; i < it2->first.size(); i++) {
92                                         //find spot 
93                                         int index = findSpot(it2->first[i]);
94                                         temp[index] = "*";
95                                 }
96                                 
97                                 //output temp
98                                 for (int j = 0; j < temp.size(); j++) { 
99                                         out2 << temp[j];
100                                 }
101                                 out2 << '\t' << it2->second << endl;
102                         }
103                 }
104                 
105                 //output sets not included
106                 out2 << endl << "Sets NOT included in the concensus tree:" << endl << endl;
107                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
108                         temp.clear();
109                         //initialize temp to all "."
110                         temp.resize(treeSet.size(), ".");
111                                 
112                         //set the spot in temp that represents it2->first[i] to a "*" 
113                         for (int i = 0; i < it2->first.size(); i++) {
114                                 //find spot 
115                                 int index = findSpot(it2->first[i]);
116                                 temp[index] = "*";
117                         }
118                                 
119                         //output temp
120                         for (int j = 0; j < temp.size(); j++) { 
121                                 out2 << temp[j];
122                         }
123                         out2 << '\t' << it2->second << endl;
124                 }
125                 
126                 outputFile = getRootName(globaldata->inputFileName) + "concensus.tre";
127                 openOutputFile(outputFile, out);
128                 
129                 concensusTree->printForBoot(out);
130                 
131                 out.close(); out2.close();
132                 
133                 delete concensusTree; 
134                 
135                 return 0;
136         }
137         catch(exception& e) {
138                 cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
139                 exit(1);
140         }
141         catch(...) {
142                 cout << "An unknown error has occurred in the ConcensusCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
143                 exit(1);
144         }               
145 }
146
147 //**********************************************************************************************************************
148 int ConcensusCommand::buildConcensusTree(vector<string> nodeSet) {
149         try {
150                 vector<string> leftChildSet;
151                 vector<string> rightChildSet;
152                 
153                 //if you are at a leaf
154                 if (nodeSet.size() == 1) {
155                         //return the vector index of the leaf you are at
156                         return concensusTree->getIndex(nodeSet[0]);
157                 //terminate recursion
158                 }else if (count == numNodes) { return 0; }
159                 else {
160                         leftChildSet = getNextAvailableSet(nodeSet);
161                         rightChildSet = getRestSet(nodeSet, leftChildSet);
162                         int left = buildConcensusTree(leftChildSet);
163                         int right = buildConcensusTree(rightChildSet);
164                         concensusTree->tree[count].setChildren(left, right);
165                         concensusTree->tree[count].setLabel(nodePairsInTree[nodeSet]); 
166                         concensusTree->tree[left].setParent(count);
167                         concensusTree->tree[right].setParent(count);
168                         count++;
169                         return (count-1);
170                 }
171         
172         }
173         catch(exception& e) {
174                 cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function buildConcensusTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
175                 exit(1);
176         }
177         catch(...) {
178                 cout << "An unknown error has occurred in the ConcensusCommand class function buildConcensusTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
179                 exit(1);
180         }               
181 }
182
183 //**********************************************************************************************************************
184 void ConcensusCommand::getSets() {
185         try {
186                 vector<string> temp;
187                 treeSet.clear();
188                 
189                 //for each tree add the possible pairs you find
190                 for (int i = 0; i < t.size(); i++) {
191                         
192                         //for each non-leaf node get descendant info.
193                         for (int j = numLeaves; j < numNodes; j++) {
194                                 temp.clear();
195                                 //go through pcounts and pull out descendants
196                                 for (it = t[i]->tree[j].pcount.begin(); it != t[i]->tree[j].pcount.end(); it++) {
197                                         temp.push_back(it->first);
198                                 }
199                                 
200                                 //sort temp
201                                 sort(temp.begin(), temp.end());
202                                 
203                                 it2 = nodePairs.find(temp);
204                                 if (it2 != nodePairs.end()) {
205                                         nodePairs[temp]++;
206                                 }else{
207                                         nodePairs[temp] = 1;
208                                 }
209                         }
210                 }
211                 
212                 //add each leaf to terminate recursion in concensus
213                 //you want the leaves in there but with insignifigant sightings value so it is added last
214                 //for each leaf node get descendant info.
215                 for (int j = 0; j < numLeaves; j++) {
216                         
217                         //only need the first one since leaves have no descendants but themselves
218                         it = t[0]->tree[j].pcount.begin(); 
219                         temp.clear();  temp.push_back(it->first);
220                         
221                         //fill treeSet
222                         treeSet.push_back(it->first);
223                         
224                         //add leaf to list but with sighting value less then all non leaf pairs 
225                         nodePairs[temp] = 0;
226                 }
227
228                 sort(treeSet.begin(), treeSet.end());
229         }
230         catch(exception& e) {
231                 cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function getSets. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
232                 exit(1);
233         }
234         catch(...) {
235                 cout << "An unknown error has occurred in the ConcensusCommand class function getSets. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
236                 exit(1);
237         }               
238 }
239
240 //**********************************************************************************************************************
241 vector<string> ConcensusCommand::getNextAvailableSet(vector<string> bigset) {
242         try {
243                 vector<string> largest; largest.clear();
244                 int largestSighting = -1;
245                 
246                 //go through the sets
247                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
248                         //are you a subset of bigset
249                         if (isSubset(bigset, it2->first) == true) {
250                         
251                                 //are you the largest. if you are the same size as current largest refer to sighting
252                                 if (it2->first.size() > largest.size()) { largest = it2->first;  largestSighting = it2->second; }
253                                 else if (it2->first.size() == largest.size()) {
254                                         if (it2->second > largestSighting) { largest = it2->first;  largestSighting = it2->second; }
255                                 }
256                                 
257                         }
258                 }
259                 
260                 //save for printing out later and for branch lengths
261                 nodePairsInTree[largest] = nodePairs[largest];
262                 
263                 //delete whatever set you return because it is no longer available
264                 nodePairs.erase(largest);
265                 
266                 return largest;
267         
268         }
269         catch(exception& e) {
270                 cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function getNextAvailableSet. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
271                 exit(1);
272         }
273         catch(...) {
274                 cout << "An unknown error has occurred in the ConcensusCommand class function getNextAvailableSet. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
275                 exit(1);
276         }               
277
278 }
279
280 //**********************************************************************************************************************
281 vector<string> ConcensusCommand::getRestSet(vector<string> bigset, vector<string> subset) {
282         try {
283                 vector<string> rest;
284                 
285                 for (int i = 0; i < bigset.size(); i++) {
286                         bool inSubset = false;
287                         for (int j = 0; j < subset.size(); j++) {
288                                 if (bigset[i] == subset[j]) { inSubset = true; break; }
289                         }
290                         
291                         //its not in the subset so put it in the rest
292                         if (inSubset == false) { rest.push_back(bigset[i]); }
293                 }
294                 
295                 //save for printing out later and for branch lengths
296                 nodePairsInTree[rest] = nodePairs[rest];
297                 
298                 //delete whatever set you return because it is no longer available
299                 nodePairs.erase(rest);
300
301                 return rest;
302         
303         }
304         catch(exception& e) {
305                 cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function getRestSet. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
306                 exit(1);
307         }
308         catch(...) {
309                 cout << "An unknown error has occurred in the ConcensusCommand class function getRestSet. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
310                 exit(1);
311         }               
312
313 }
314
315 //**********************************************************************************************************************
316 bool ConcensusCommand::isSubset(vector<string> bigset, vector<string> subset) {
317         try {
318                 
319                 //check if each guy in suset is also in bigset
320                 for (int i = 0; i < subset.size(); i++) {
321                         bool match = false;
322                         for (int j = 0; j < bigset.size(); j++) {
323                                 if (subset[i] == bigset[j]) { match = true; break; }
324                         }
325                         
326                         //you have a guy in subset that had no match in bigset
327                         if (match == false) { return false; }
328                 }
329                 
330                 return true;
331         
332         }
333         catch(exception& e) {
334                 cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function isSubset. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
335                 exit(1);
336         }
337         catch(...) {
338                 cout << "An unknown error has occurred in the ConcensusCommand class function isSubset. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
339                 exit(1);
340         }               
341 }
342 //**********************************************************************************************************************
343 int ConcensusCommand::findSpot(string node) {
344         try {
345                 int spot;
346                 
347                 //check if each guy in suset is also in bigset
348                 for (int i = 0; i < treeSet.size(); i++) {
349                         if (treeSet[i] == node) { spot = i; break; }
350                 }
351                 
352                 return spot;
353         
354         }
355         catch(exception& e) {
356                 cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function findSpot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
357                 exit(1);
358         }
359         catch(...) {
360                 cout << "An unknown error has occurred in the ConcensusCommand class function findSpot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
361                 exit(1);
362         }               
363 }
364 //**********************************************************************************************************************
365
366
367
368