]> git.donarmstrong.com Git - mothur.git/blob - concensuscommand.cpp
09e0f324268332e0f691d42b00f003aab9f9c9c9
[mothur.git] / concensuscommand.cpp
1 /*
2  *  concensuscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/29/09.
6  *  Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
7  *
8  */
9
10 #include "concensuscommand.h"
11
12 //**********************************************************************************************************************
13
14 ConcensusCommand::ConcensusCommand(string option){
15         try {
16                 globaldata = GlobalData::getInstance();
17                 abort = false;
18                 
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         if (option != "") { cout << "There are no valid parameters for the concensus command." << endl; abort = true; }
24                         
25                         //no trees were read
26                         if (globaldata->gTree.size() == 0) {    cout << "You must execute the read.tree command, before you may use the concensus command." << endl; abort = true;  }
27                         else {  t = globaldata->gTree;  }
28                 }
29         }
30         catch(exception& e) {
31                 cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function ConcensusCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
32                 exit(1);
33         }
34         catch(...) {
35                 cout << "An unknown error has occurred in the ConcensusCommand class function ConcensusCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
36                 exit(1);
37         }       
38 }
39
40 //**********************************************************************************************************************
41
42 void ConcensusCommand::help(){
43         try {
44                 cout << "The concensus command can only be executed after a successful read.tree command." << "\n";
45                 cout << "The concensus command has no parameters." << "\n";
46                 cout << "The concensus command should be in the following format: concensus()." << "\n";
47                 cout << "The concensus command output two files: .concensus.tre and .concensuspairs." << "\n";
48                 cout << "The .concensus.tre file contains the concensus tree of the trees in your input file." << "\n";
49                 cout << "The branch lengths are the percentage of trees in your input file that had the given pair." << "\n";
50                 cout << "The .concensuspairs file contains a list of the internal nodes in your tree.  For each node, the pair that was used in the concensus tree " << "\n";
51                 cout << "is reported with its percentage, as well as the other pairs that were seen for that node but not used and their percentages." << "\n" << "\n";         
52         }
53         catch(exception& e) {
54                 cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
55                 exit(1);
56         }
57         catch(...) {
58                 cout << "An unknown error has occurred in the ConcensusCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
59                 exit(1);
60         }       
61 }
62
63 //**********************************************************************************************************************
64
65 ConcensusCommand::~ConcensusCommand(){}
66
67 //**********************************************************************************************************************
68
69 int ConcensusCommand::execute(){
70         try {
71                 
72                 if (abort == true) { return 0; }
73                 else {  
74                         numNodes = t[0]->getNumNodes();
75                         numLeaves = t[0]->getNumLeaves();
76                 }
77                 
78                 //get the possible pairings
79                 getSets();              
80                 
81                 //print out pairings for testing
82                 /*cout << "possible pairing " <<  endl;
83                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
84                         for (int i = 0; i < it2->first.size(); i++) {
85                                 cout << it2->first[i] << " ";
86                         }
87                         cout << '\t' << it2->second << endl;
88                 }*/
89                 
90                 
91                 //open file for pairing not included in the tree
92                 notIncluded = getRootName(globaldata->inputFileName) + "concensuspairs";
93                 openOutputFile(notIncluded, out2);
94                 
95                 concensusTree = new Tree();
96                 
97                 it2 = nodePairs.find(treeSet);
98                 
99                 nodePairsInTree[treeSet] = it2->second; 
100                 
101                 //erase treeset because you are adding it
102                 nodePairs.erase(treeSet);
103                 
104                 //set count to numLeaves;
105                 count = numLeaves;
106                 
107                 buildConcensusTree(treeSet);
108                 
109                 concensusTree->assembleTree();
110                 
111                 //output species in order
112                 out2 << "Species in Order: " << endl << endl;
113                 for (int i = 0; i < treeSet.size(); i++) {  out2 << i+1 << ".  " << treeSet[i] << endl; }
114                 
115                 vector<string> temp; 
116                 //output sets included
117                 out2 << endl << "Sets included in the concensus tree:" << endl << endl;
118                 for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) {
119                         //only output pairs not leaves
120                         if (it2->first.size() > 1) { 
121                                 temp.clear();
122                                 //initialize temp to all "."
123                                 temp.resize(treeSet.size(), ".");
124                                 
125                                 //set the spot in temp that represents it2->first[i] to a "*" 
126                                 for (int i = 0; i < it2->first.size(); i++) {
127                                         //find spot 
128                                         int index = findSpot(it2->first[i]);
129                                         temp[index] = "*";
130                                 }
131                                 
132                                 //output temp
133                                 for (int j = 0; j < temp.size(); j++) { 
134                                         out2 << temp[j];
135                                 }
136                                 out2 << '\t' << it2->second << endl;
137                         }
138                 }
139                 
140                 //output sets not included
141                 out2 << endl << "Sets NOT included in the concensus tree:" << endl << endl;
142                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
143                         temp.clear();
144                         //initialize temp to all "."
145                         temp.resize(treeSet.size(), ".");
146                                 
147                         //set the spot in temp that represents it2->first[i] to a "*" 
148                         for (int i = 0; i < it2->first.size(); i++) {
149                                 //find spot 
150                                 int index = findSpot(it2->first[i]);
151                                 temp[index] = "*";
152                         }
153                                 
154                         //output temp
155                         for (int j = 0; j < temp.size(); j++) { 
156                                 out2 << temp[j];
157                         }
158                         out2 << '\t' << it2->second << endl;
159                 }
160                 
161                 outputFile = getRootName(globaldata->inputFileName) + "concensus.tre";
162                 openOutputFile(outputFile, out);
163                 
164                 concensusTree->printForBoot(out);
165                 
166                 out.close(); out2.close();
167                 
168                 delete concensusTree; 
169                 
170                 return 0;
171         }
172         catch(exception& e) {
173                 cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
174                 exit(1);
175         }
176         catch(...) {
177                 cout << "An unknown error has occurred in the ConcensusCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
178                 exit(1);
179         }               
180 }
181
182 //**********************************************************************************************************************
183 int ConcensusCommand::buildConcensusTree(vector<string> nodeSet) {
184         try {
185                 vector<string> leftChildSet;
186                 vector<string> rightChildSet;
187                 
188                 //if you are at a leaf
189                 if (nodeSet.size() == 1) {
190                         //return the vector index of the leaf you are at
191                         return concensusTree->getIndex(nodeSet[0]);
192                 //terminate recursion
193                 }else if (count == numNodes) { return 0; }
194                 else {
195                         leftChildSet = getNextAvailableSet(nodeSet);
196                         rightChildSet = getRestSet(nodeSet, leftChildSet);
197                         int left = buildConcensusTree(leftChildSet);
198                         int right = buildConcensusTree(rightChildSet);
199                         concensusTree->tree[count].setChildren(left, right);
200                         concensusTree->tree[count].setLabel(nodePairsInTree[nodeSet]); 
201                         concensusTree->tree[left].setParent(count);
202                         concensusTree->tree[right].setParent(count);
203                         count++;
204                         return (count-1);
205                 }
206         
207         }
208         catch(exception& e) {
209                 cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function buildConcensusTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
210                 exit(1);
211         }
212         catch(...) {
213                 cout << "An unknown error has occurred in the ConcensusCommand class function buildConcensusTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
214                 exit(1);
215         }               
216 }
217
218 //**********************************************************************************************************************
219 void ConcensusCommand::getSets() {
220         try {
221                 vector<string> temp;
222                 treeSet.clear();
223                 
224                 //for each tree add the possible pairs you find
225                 for (int i = 0; i < t.size(); i++) {
226                         
227                         //for each non-leaf node get descendant info.
228                         for (int j = numLeaves; j < numNodes; j++) {
229                                 temp.clear();
230                                 //go through pcounts and pull out descendants
231                                 for (it = t[i]->tree[j].pcount.begin(); it != t[i]->tree[j].pcount.end(); it++) {
232                                         temp.push_back(it->first);
233                                 }
234                                 
235                                 //sort temp
236                                 sort(temp.begin(), temp.end());
237                                 
238                                 it2 = nodePairs.find(temp);
239                                 if (it2 != nodePairs.end()) {
240                                         nodePairs[temp]++;
241                                 }else{
242                                         nodePairs[temp] = 1;
243                                 }
244                         }
245                 }
246                 
247                 //add each leaf to terminate recursion in concensus
248                 //you want the leaves in there but with insignifigant sightings value so it is added last
249                 //for each leaf node get descendant info.
250                 for (int j = 0; j < numLeaves; j++) {
251                         
252                         //only need the first one since leaves have no descendants but themselves
253                         it = t[0]->tree[j].pcount.begin(); 
254                         temp.clear();  temp.push_back(it->first);
255                         
256                         //fill treeSet
257                         treeSet.push_back(it->first);
258                         
259                         //add leaf to list but with sighting value less then all non leaf pairs 
260                         nodePairs[temp] = 0;
261                 }
262
263                 sort(treeSet.begin(), treeSet.end());
264         }
265         catch(exception& e) {
266                 cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function getSets. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
267                 exit(1);
268         }
269         catch(...) {
270                 cout << "An unknown error has occurred in the ConcensusCommand class function getSets. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
271                 exit(1);
272         }               
273 }
274
275 //**********************************************************************************************************************
276 vector<string> ConcensusCommand::getNextAvailableSet(vector<string> bigset) {
277         try {
278                 vector<string> largest; largest.clear();
279                 int largestSighting = -1;
280                 
281                 //go through the sets
282                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
283                         //are you a subset of bigset
284                         if (isSubset(bigset, it2->first) == true) {
285                         
286                                 //are you the largest. if you are the same size as current largest refer to sighting
287                                 if (it2->first.size() > largest.size()) { largest = it2->first;  largestSighting = it2->second; }
288                                 else if (it2->first.size() == largest.size()) {
289                                         if (it2->second > largestSighting) { largest = it2->first;  largestSighting = it2->second; }
290                                 }
291                                 
292                         }
293                 }
294                 
295                 //save for printing out later and for branch lengths
296                 nodePairsInTree[largest] = nodePairs[largest];
297                 
298                 //delete whatever set you return because it is no longer available
299                 nodePairs.erase(largest);
300                 
301                 return largest;
302         
303         }
304         catch(exception& e) {
305                 cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function getNextAvailableSet. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
306                 exit(1);
307         }
308         catch(...) {
309                 cout << "An unknown error has occurred in the ConcensusCommand class function getNextAvailableSet. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
310                 exit(1);
311         }               
312
313 }
314
315 //**********************************************************************************************************************
316 vector<string> ConcensusCommand::getRestSet(vector<string> bigset, vector<string> subset) {
317         try {
318                 vector<string> rest;
319                 
320                 for (int i = 0; i < bigset.size(); i++) {
321                         bool inSubset = false;
322                         for (int j = 0; j < subset.size(); j++) {
323                                 if (bigset[i] == subset[j]) { inSubset = true; break; }
324                         }
325                         
326                         //its not in the subset so put it in the rest
327                         if (inSubset == false) { rest.push_back(bigset[i]); }
328                 }
329                 
330                 //save for printing out later and for branch lengths
331                 nodePairsInTree[rest] = nodePairs[rest];
332                 
333                 //delete whatever set you return because it is no longer available
334                 nodePairs.erase(rest);
335
336                 return rest;
337         
338         }
339         catch(exception& e) {
340                 cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function getRestSet. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
341                 exit(1);
342         }
343         catch(...) {
344                 cout << "An unknown error has occurred in the ConcensusCommand class function getRestSet. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
345                 exit(1);
346         }               
347
348 }
349
350 //**********************************************************************************************************************
351 bool ConcensusCommand::isSubset(vector<string> bigset, vector<string> subset) {
352         try {
353                 
354                 //check if each guy in suset is also in bigset
355                 for (int i = 0; i < subset.size(); i++) {
356                         bool match = false;
357                         for (int j = 0; j < bigset.size(); j++) {
358                                 if (subset[i] == bigset[j]) { match = true; break; }
359                         }
360                         
361                         //you have a guy in subset that had no match in bigset
362                         if (match == false) { return false; }
363                 }
364                 
365                 return true;
366         
367         }
368         catch(exception& e) {
369                 cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function isSubset. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
370                 exit(1);
371         }
372         catch(...) {
373                 cout << "An unknown error has occurred in the ConcensusCommand class function isSubset. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
374                 exit(1);
375         }               
376 }
377 //**********************************************************************************************************************
378 int ConcensusCommand::findSpot(string node) {
379         try {
380                 int spot;
381                 
382                 //check if each guy in suset is also in bigset
383                 for (int i = 0; i < treeSet.size(); i++) {
384                         if (treeSet[i] == node) { spot = i; break; }
385                 }
386                 
387                 return spot;
388         
389         }
390         catch(exception& e) {
391                 cout << "Standard Error: " << e.what() << " has occurred in the ConcensusCommand class Function findSpot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
392                 exit(1);
393         }
394         catch(...) {
395                 cout << "An unknown error has occurred in the ConcensusCommand class function findSpot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
396                 exit(1);
397         }               
398 }
399 //**********************************************************************************************************************
400
401
402
403