]> git.donarmstrong.com Git - mothur.git/blob - consensuscommand.cpp
fixes while testing
[mothur.git] / consensuscommand.cpp
1 /*
2  *  consensuscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/29/09.
6  *  Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
7  *
8  */
9
10 #include "consensuscommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> ConcensusCommand::getValidParameters(){  
14         try {
15                 vector<string> myArray; 
16                 return myArray;
17         }
18         catch(exception& e) {
19                 m->errorOut(e, "ConcensusCommand", "getValidParameters");
20                 exit(1);
21         }
22 }
23 //**********************************************************************************************************************
24 vector<string> ConcensusCommand::getRequiredParameters(){       
25         try {
26                 vector<string> myArray; 
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "ConcensusCommand", "getRequiredParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 vector<string> ConcensusCommand::getRequiredFiles(){    
36         try {
37                 string AlignArray[] =  {"tree","group"};
38                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
39                 return myArray;
40         }
41         catch(exception& e) {
42                 m->errorOut(e, "ConcensusCommand", "getRequiredFiles");
43                 exit(1);
44         }
45 }
46 //**********************************************************************************************************************
47 ConcensusCommand::ConcensusCommand(){   
48         try {
49                 abort = true;
50                 //initialize outputTypes
51                 vector<string> tempOutNames;
52                 outputTypes["tree"] = tempOutNames;
53                 outputTypes["nodepairs"] = tempOutNames;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "ConcensusCommand", "ConcensusCommand");
57                 exit(1);
58         }
59 }
60 //**********************************************************************************************************************
61
62 ConcensusCommand::ConcensusCommand(string fileroot)  {
63         try {
64                 globaldata = GlobalData::getInstance();
65                 abort = false;
66                 
67                 //initialize outputTypes
68                 vector<string> tempOutNames;
69                 outputTypes["tree"] = tempOutNames;
70                 outputTypes["nodepairs"] = tempOutNames;
71                 
72                 filename = fileroot;
73                 
74                 t = globaldata->gTree;
75         
76         }
77         catch(exception& e) {
78                 m->errorOut(e, "ConcensusCommand", "ConcensusCommand");
79                 exit(1);
80         }
81 }
82
83 //**********************************************************************************************************************
84
85 void ConcensusCommand::help(){
86         try {
87                 m->mothurOut("The consensus command can only be executed after a successful read.tree command.\n");
88                 m->mothurOut("The consensus command has no parameters.\n");
89                 m->mothurOut("The consensus command should be in the following format: consensus().\n");
90                 m->mothurOut("The consensus command output two files: .consensus.tre and .consensuspairs.\n");
91                 m->mothurOut("The .consensus.tre file contains the consensus tree of the trees in your input file.\n");
92                 m->mothurOut("The branch lengths are the percentage of trees in your input file that had the given pair.\n");
93                 m->mothurOut("The .consensuspairs file contains a list of the internal nodes in your tree.  For each node, the pair that was used in the consensus tree \n");
94                 m->mothurOut("is reported with its percentage, as well as the other pairs that were seen for that node but not used and their percentages.\n\n");               
95         }
96         catch(exception& e) {
97                 m->errorOut(e, "ConcensusCommand", "help");
98                 exit(1);
99         }
100 }
101
102 //**********************************************************************************************************************
103
104 ConcensusCommand::~ConcensusCommand(){}
105
106 //**********************************************************************************************************************
107
108 int ConcensusCommand::execute(){
109         try {
110                 
111                 if (abort == true) { return 0; }
112                 else {  
113                         numNodes = t[0]->getNumNodes();
114                         numLeaves = t[0]->getNumLeaves();
115                 }
116                 
117                 //get the possible pairings
118                 getSets();      
119                 
120                 if (m->control_pressed) { return 0; }
121                 
122                 //open file for pairing not included in the tree
123                 notIncluded = filename + ".cons.pairs"; outputNames.push_back(notIncluded);  outputTypes["nodepairs"].push_back(notIncluded);
124                 m->openOutputFile(notIncluded, out2);
125                 
126                 consensusTree = new Tree();
127                 
128                 it2 = nodePairs.find(treeSet);
129                 
130                 nodePairsInTree[treeSet] = it2->second; 
131                 
132                 //erase treeset because you are adding it
133                 nodePairs.erase(treeSet);
134                 
135                 //set count to numLeaves;
136                 count = numLeaves;
137                 
138                 buildConcensusTree(treeSet);
139                 
140                 if (m->control_pressed) { delete consensusTree; return 0; }
141                 
142                 consensusTree->assembleTree();
143                 
144                 if (m->control_pressed) { delete consensusTree; return 0; }
145                 
146                 //output species in order
147                 out2 << "Species in Order: " << endl << endl;
148                 for (int i = 0; i < treeSet.size(); i++) {  out2 << i+1 << ".  " << treeSet[i] << endl; }
149                 
150                 //output sets included
151                 out2 << endl << "Sets included in the consensus tree:" << endl << endl;
152                 
153                 if (m->control_pressed) { delete consensusTree; return 0; }
154                 
155                 vector<string> temp;
156                 for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) {
157                 
158                         if (m->control_pressed) { delete consensusTree; return 0; }
159                         
160                         //only output pairs not leaves
161                         if (it2->first.size() > 1) { 
162                                 temp.clear();
163                                 //initialize temp to all "."
164                                 temp.resize(treeSet.size(), ".");
165                                 
166                                 //set the spot in temp that represents it2->first[i] to a "*" 
167                                 for (int i = 0; i < it2->first.size(); i++) {
168                                         //find spot 
169                                         int index = findSpot(it2->first[i]);
170                                         temp[index] = "*";
171                                         //temp[index] = it2->first[i] + "  ";
172                                 }
173                                 
174                                 //output temp
175                                 for (int j = 0; j < temp.size(); j++) { 
176                                         out2 << temp[j];
177                                 }
178                                 out2 << '\t' << it2->second << endl;
179                         }
180                 }
181                 
182                 //output sets not included
183                 out2 << endl << "Sets NOT included in the consensus tree:" << endl << endl;
184                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
185                 
186                         if (m->control_pressed) { delete consensusTree; return 0; }
187                         
188                         temp.clear();
189                         //initialize temp to all "."
190                         temp.resize(treeSet.size(), ".");
191                                 
192                         //set the spot in temp that represents it2->first[i] to a "*" 
193                         for (int i = 0; i < it2->first.size(); i++) {
194                                 //find spot 
195                                 int index = findSpot(it2->first[i]);
196                                 temp[index] = "*";
197                         }
198                                 
199                         //output temp
200                         for (int j = 0; j < temp.size(); j++) { 
201                                 out2 << temp[j];
202                         }
203                         out2 << '\t' << it2->second << endl;
204                 }
205                 
206                 outputFile = filename + ".cons.tre";  outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
207                 m->openOutputFile(outputFile, out);
208                 
209                 consensusTree->printForBoot(out);
210                 
211                 out.close(); out2.close();
212                 
213                 delete consensusTree; 
214                 
215                 return 0;
216         }
217         catch(exception& e) {
218                 m->errorOut(e, "ConcensusCommand", "execute");
219                 exit(1);
220         }
221 }
222
223 //**********************************************************************************************************************
224 int ConcensusCommand::buildConcensusTree(vector<string> nodeSet) {
225         try {
226                 vector<string> leftChildSet;
227                 vector<string> rightChildSet;
228                 
229                 if (m->control_pressed) { return 1; }
230                 
231                 //if you are at a leaf
232                 if (nodeSet.size() == 1) {
233                         //return the vector index of the leaf you are at
234                         return consensusTree->getIndex(nodeSet[0]);
235                 //terminate recursion
236                 }else if (count == numNodes) { return 0; }
237                 else {
238                         //finds best child pair
239                         leftChildSet = getNextAvailableSet(nodeSet, rightChildSet);
240                         int left = buildConcensusTree(leftChildSet);
241                         int right = buildConcensusTree(rightChildSet);
242                         consensusTree->tree[count].setChildren(left, right);
243                         consensusTree->tree[count].setLabel(nodePairsInTree[nodeSet]); 
244                         consensusTree->tree[left].setParent(count);
245                         consensusTree->tree[right].setParent(count);
246                         count++;
247                         return (count-1);
248                 }
249         
250         }
251         catch(exception& e) {
252                 m->errorOut(e, "ConcensusCommand", "buildConcensusTree");
253                 exit(1);
254         }
255 }
256
257 //**********************************************************************************************************************
258 int ConcensusCommand::getSets() {
259         try {
260                 vector<string> temp;
261                 treeSet.clear();
262                 
263                 //for each tree add the possible pairs you find
264                 for (int i = 0; i < t.size(); i++) {
265                         
266                         //for each non-leaf node get descendant info.
267                         for (int j = numLeaves; j < numNodes; j++) {
268                                 
269                                 if (m->control_pressed) { return 1; }
270                                 
271                                 temp.clear();
272                                 //go through pcounts and pull out descendants
273                                 for (it = t[i]->tree[j].pcount.begin(); it != t[i]->tree[j].pcount.end(); it++) {
274                                         temp.push_back(it->first);
275                                 }
276                                 
277                                 //sort temp
278                                 sort(temp.begin(), temp.end());
279                                 
280                                 it2 = nodePairs.find(temp);
281                                 if (it2 != nodePairs.end()) {
282                                         nodePairs[temp]++;
283                                 }else{
284                                         nodePairs[temp] = 1;
285                                 }
286                         }
287                 }
288                 
289                 
290                 //add each leaf to terminate recursion in consensus
291                 //you want the leaves in there but with insignifigant sightings value so it is added last
292                 //for each leaf node get descendant info.
293                 for (int j = 0; j < numLeaves; j++) {
294                 
295                         if (m->control_pressed) { return 1; }
296                         
297                         //only need the first one since leaves have no descendants but themselves
298                         it = t[0]->tree[j].pcount.begin(); 
299                         temp.clear();  temp.push_back(it->first);
300                         
301                         //fill treeSet
302                         treeSet.push_back(it->first);
303                         
304                         //add leaf to list but with sighting value less then all non leaf pairs 
305                         nodePairs[temp] = 0;
306                 }
307
308                 sort(treeSet.begin(), treeSet.end());
309                 
310                 
311                 map< vector<string>, int> nodePairsCopy = nodePairs;
312                 
313                 
314                 //set initial rating on pairs to sightings + subgroup sightings
315                 while (nodePairsCopy.size() != 0) {
316                         if (m->control_pressed) { return 1; }
317                 
318                         vector<string> smallOne = getSmallest(nodePairsCopy);
319                         
320                         int subgrouprate = getSubgroupRating(smallOne);
321                 
322                         nodePairsInitialRate[smallOne] = nodePairs[smallOne] + subgrouprate;
323                         
324                         nodePairsCopy.erase(smallOne);
325                 }
326                 
327                 return 0;
328         }
329         catch(exception& e) {
330                 m->errorOut(e, "ConcensusCommand", "getSets");
331                 exit(1);
332         }
333 }
334 //**********************************************************************************************************************
335 vector<string> ConcensusCommand::getSmallest(map< vector<string>, int> nodes) {
336         try{
337                 vector<string> smallest = nodes.begin()->first;
338                 int smallsize = smallest.size();
339                 
340                 for(it2 = nodes.begin(); it2 != nodes.end(); it2++) {
341                         if(it2->first.size() < smallsize) { smallsize = it2->first.size();  smallest = it2->first;  }
342                 }
343                 
344                 return smallest;
345         }
346         catch(exception& e) {
347                 m->errorOut(e, "ConcensusCommand", "getSmallest");
348                 exit(1);
349         }
350 }
351
352 //**********************************************************************************************************************
353 vector<string> ConcensusCommand::getNextAvailableSet(vector<string> bigset, vector<string>& rest) {
354         try {
355 //cout << "new call " << endl << endl << endl;
356                 vector<string> largest; largest.clear();
357                 rest.clear();
358                 
359                 //if you are just 2 groups
360                 if (bigset.size() == 2)  {   
361                         rest.push_back(bigset[0]);
362                         largest.push_back(bigset[1]);
363                 }else{
364                         rest = bestSplit[bigset][0];
365                         largest = bestSplit[bigset][1];
366                 }
367                 
368                 
369                 //save for printing out later and for branch lengths
370                 nodePairsInTree[rest] = nodePairs[rest];
371                 
372                 //delete whatever set you return because it is no longer available
373                 nodePairs.erase(rest);
374
375                 //save for printing out later and for branch lengths
376                 nodePairsInTree[largest] = nodePairs[largest];
377                 
378                 //delete whatever set you return because it is no longer available
379                 nodePairs.erase(largest);
380                 
381                 return largest;
382         
383         }
384         catch(exception& e) {
385                 m->errorOut(e, "ConcensusCommand", "getNextAvailableSet");
386                 exit(1);
387         }
388 }
389
390 /**********************************************************************************************************************/
391 int ConcensusCommand::getSubgroupRating(vector<string> group) {
392         try {
393                 map< vector<string>, int>::iterator ittemp;
394                 map< vector< vector<string> > , int >::iterator it3;
395                 int rate = 0;
396                 
397                 // ***********************************************************************************//
398                 //1. this function must be called passing it littlest sets to biggest 
399                 //              since it the rating is made from your sighting plus you best splits rating
400                 //2. it saves the top pair to use later
401                 // ***********************************************************************************//
402
403                 
404                 if (group.size() < 3) {  return rate;  }
405                 
406                 map< vector<string>, int> possiblePairing;  //this is all the subsets of group
407                 
408                 //go through the sets
409                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
410                         //are you a subset of bigset, then save in possiblePairings
411                         if (isSubset(group, it2->first) == true) {  possiblePairing[it2->first] = it2->second;  }
412                 }               
413         
414                 map< vector< vector<string> > , int > rating;
415                 
416                 while (possiblePairing.size() != 0) {
417                 
418                         it2 = possiblePairing.begin(); 
419                         vector<string> temprest = getRestSet(group, it2->first);
420                         
421                         //is the rest a set available in possiblePairings
422                         ittemp = possiblePairing.find(temprest);
423                         if (ittemp != possiblePairing.end()) {  //if the rest is in the possible pairings then add this pair to rating map
424                                 vector< vector<string> > temprate;
425                                 temprate.push_back(it2->first);  temprate.push_back(temprest);
426                                 
427                                 rating[temprate] = (nodePairsInitialRate[it2->first] + nodePairsInitialRate[temprest]);
428                                 
429                                 //erase so you dont add 1,2 and 2,1.
430                                 possiblePairing.erase(temprest);
431                         }
432                         
433                         possiblePairing.erase(it2);
434                 }
435
436
437                 it3 = rating.begin();
438                 rate = it3->second;
439                 vector< vector<string> > topPair = it3->first;
440                 
441                 //choose the split with the best rating
442                 for (it3 = rating.begin(); it3 != rating.end(); it3++) {
443                         
444                         if (it3->second > rate) {  rate = it3->second;  topPair = it3->first;  }
445                 }
446                 
447                 bestSplit[group] = topPair;
448                 
449                 return rate;
450         }
451         catch(exception& e) {
452                 m->errorOut(e, "ConcensusCommand", "getSubgroupRating");
453                 exit(1);
454         }
455 }
456
457 //**********************************************************************************************************************
458 vector<string> ConcensusCommand::getRestSet(vector<string> bigset, vector<string> subset) {
459         try {
460                 vector<string> rest;
461                 
462                 for (int i = 0; i < bigset.size(); i++) {
463                         bool inSubset = false;
464                         for (int j = 0; j < subset.size(); j++) {
465                                 if (bigset[i] == subset[j]) { inSubset = true; break; }
466                         }
467                         
468                         //its not in the subset so put it in the rest
469                         if (inSubset == false) { rest.push_back(bigset[i]); }
470                 }
471
472                 return rest;
473         
474         }
475         catch(exception& e) {
476                 m->errorOut(e, "ConcensusCommand", "getRestSet");
477                 exit(1);
478         }
479 }
480
481 //**********************************************************************************************************************
482 bool ConcensusCommand::isSubset(vector<string> bigset, vector<string> subset) {
483         try {
484                 
485         
486                 if (subset.size() > bigset.size()) { return false;  }
487                 
488                 //check if each guy in suset is also in bigset
489                 for (int i = 0; i < subset.size(); i++) {
490                         bool match = false;
491                         for (int j = 0; j < bigset.size(); j++) {
492                                 if (subset[i] == bigset[j]) { match = true; break; }
493                         }
494                         
495                         //you have a guy in subset that had no match in bigset
496                         if (match == false) { return false; }
497                 }
498                 
499                 return true;
500         
501         }
502         catch(exception& e) {
503                 m->errorOut(e, "ConcensusCommand", "isSubset");
504                 exit(1);
505         }
506 }
507 //**********************************************************************************************************************
508 int ConcensusCommand::findSpot(string node) {
509         try {
510                 int spot;
511                 
512                 //check if each guy in suset is also in bigset
513                 for (int i = 0; i < treeSet.size(); i++) {
514                         if (treeSet[i] == node) { spot = i; break; }
515                 }
516                 
517                 return spot;
518         
519         }
520         catch(exception& e) {
521                 m->errorOut(e, "ConcensusCommand", "findSpot");
522                 exit(1);
523         }
524 }
525 //**********************************************************************************************************************
526
527
528
529