]> git.donarmstrong.com Git - mothur.git/blob - consensuscommand.cpp
added set.current and get.current commands and modified existing commands to update...
[mothur.git] / consensuscommand.cpp
1 /*
2  *  consensuscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/29/09.
6  *  Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
7  *
8  */
9
10 #include "consensuscommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> ConcensusCommand::getValidParameters(){  
14         try {
15                 vector<string> myArray; 
16                 return myArray;
17         }
18         catch(exception& e) {
19                 m->errorOut(e, "ConcensusCommand", "getValidParameters");
20                 exit(1);
21         }
22 }
23 //**********************************************************************************************************************
24 vector<string> ConcensusCommand::getRequiredParameters(){       
25         try {
26                 vector<string> myArray; 
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "ConcensusCommand", "getRequiredParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 vector<string> ConcensusCommand::getRequiredFiles(){    
36         try {
37                 string AlignArray[] =  {"tree","group"};
38                 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
39                 return myArray;
40         }
41         catch(exception& e) {
42                 m->errorOut(e, "ConcensusCommand", "getRequiredFiles");
43                 exit(1);
44         }
45 }
46 //**********************************************************************************************************************
47 ConcensusCommand::ConcensusCommand(){   
48         try {
49                 abort = true; calledHelp = true; 
50                 vector<string> tempOutNames;
51                 outputTypes["tree"] = tempOutNames;
52                 outputTypes["nodepairs"] = tempOutNames;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "ConcensusCommand", "ConcensusCommand");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60
61 ConcensusCommand::ConcensusCommand(string fileroot)  {
62         try {
63                 globaldata = GlobalData::getInstance();
64                 abort = false; calledHelp = false;   
65                 
66                 //initialize outputTypes
67                 vector<string> tempOutNames;
68                 outputTypes["tree"] = tempOutNames;
69                 outputTypes["nodepairs"] = tempOutNames;
70                 
71                 filename = fileroot;
72                 
73                 t = globaldata->gTree;
74         
75         }
76         catch(exception& e) {
77                 m->errorOut(e, "ConcensusCommand", "ConcensusCommand");
78                 exit(1);
79         }
80 }
81
82 //**********************************************************************************************************************
83
84 void ConcensusCommand::help(){
85         try {
86                 m->mothurOut("The consensus command can only be executed after a successful read.tree command.\n");
87                 m->mothurOut("The consensus command has no parameters.\n");
88                 m->mothurOut("The consensus command should be in the following format: consensus().\n");
89                 m->mothurOut("The consensus command output two files: .consensus.tre and .consensuspairs.\n");
90                 m->mothurOut("The .consensus.tre file contains the consensus tree of the trees in your input file.\n");
91                 m->mothurOut("The branch lengths are the percentage of trees in your input file that had the given pair.\n");
92                 m->mothurOut("The .consensuspairs file contains a list of the internal nodes in your tree.  For each node, the pair that was used in the consensus tree \n");
93                 m->mothurOut("is reported with its percentage, as well as the other pairs that were seen for that node but not used and their percentages.\n\n");               
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "ConcensusCommand", "help");
97                 exit(1);
98         }
99 }
100
101 //**********************************************************************************************************************
102
103 ConcensusCommand::~ConcensusCommand(){}
104
105 //**********************************************************************************************************************
106
107 int ConcensusCommand::execute(){
108         try {
109                 
110                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
111                 else {  
112                         numNodes = t[0]->getNumNodes();
113                         numLeaves = t[0]->getNumLeaves();
114                 }
115                 
116                 //get the possible pairings
117                 getSets();      
118                 
119                 if (m->control_pressed) { return 0; }
120                 
121                 //open file for pairing not included in the tree
122                 notIncluded = filename + ".cons.pairs"; outputNames.push_back(notIncluded);  outputTypes["nodepairs"].push_back(notIncluded);
123                 m->openOutputFile(notIncluded, out2);
124                 
125                 consensusTree = new Tree();
126                 
127                 it2 = nodePairs.find(treeSet);
128                 
129                 nodePairsInTree[treeSet] = it2->second; 
130                 
131                 //erase treeset because you are adding it
132                 nodePairs.erase(treeSet);
133                 
134                 //set count to numLeaves;
135                 count = numLeaves;
136                 
137                 buildConcensusTree(treeSet);
138                 
139                 if (m->control_pressed) { delete consensusTree; return 0; }
140                 
141                 consensusTree->assembleTree();
142                 
143                 if (m->control_pressed) { delete consensusTree; return 0; }
144                 
145                 //output species in order
146                 out2 << "Species in Order: " << endl << endl;
147                 for (int i = 0; i < treeSet.size(); i++) {  out2 << i+1 << ".  " << treeSet[i] << endl; }
148                 
149                 //output sets included
150                 out2 << endl << "Sets included in the consensus tree:" << endl << endl;
151                 
152                 if (m->control_pressed) { delete consensusTree; return 0; }
153                 
154                 vector<string> temp;
155                 for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) {
156                 
157                         if (m->control_pressed) { delete consensusTree; return 0; }
158                         
159                         //only output pairs not leaves
160                         if (it2->first.size() > 1) { 
161                                 temp.clear();
162                                 //initialize temp to all "."
163                                 temp.resize(treeSet.size(), ".");
164                                 
165                                 //set the spot in temp that represents it2->first[i] to a "*" 
166                                 for (int i = 0; i < it2->first.size(); i++) {
167                                         //find spot 
168                                         int index = findSpot(it2->first[i]);
169                                         temp[index] = "*";
170                                         //temp[index] = it2->first[i] + "  ";
171                                 }
172                                 
173                                 //output temp
174                                 for (int j = 0; j < temp.size(); j++) { 
175                                         out2 << temp[j];
176                                 }
177                                 out2 << '\t' << it2->second << endl;
178                         }
179                 }
180                 
181                 //output sets not included
182                 out2 << endl << "Sets NOT included in the consensus tree:" << endl << endl;
183                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
184                 
185                         if (m->control_pressed) { delete consensusTree; return 0; }
186                         
187                         temp.clear();
188                         //initialize temp to all "."
189                         temp.resize(treeSet.size(), ".");
190                                 
191                         //set the spot in temp that represents it2->first[i] to a "*" 
192                         for (int i = 0; i < it2->first.size(); i++) {
193                                 //find spot 
194                                 int index = findSpot(it2->first[i]);
195                                 temp[index] = "*";
196                         }
197                                 
198                         //output temp
199                         for (int j = 0; j < temp.size(); j++) { 
200                                 out2 << temp[j];
201                         }
202                         out2 << '\t' << it2->second << endl;
203                 }
204                 
205                 outputFile = filename + ".cons.tre";  outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
206                 m->openOutputFile(outputFile, out);
207                 
208                 consensusTree->print(out, "boot");
209                 
210                 out.close(); out2.close();
211                 
212                 delete consensusTree; 
213                 
214                 //set first tree file as new current treefile
215                 string currentTree = "";
216                 itTypes = outputTypes.find("tree");
217                 if (itTypes != outputTypes.end()) {
218                         if ((itTypes->second).size() != 0) { currentTree = (itTypes->second)[0]; m->setTreeFile(currentTree); }
219                 }
220                 
221                 return 0;
222         }
223         catch(exception& e) {
224                 m->errorOut(e, "ConcensusCommand", "execute");
225                 exit(1);
226         }
227 }
228
229 //**********************************************************************************************************************
230 int ConcensusCommand::buildConcensusTree(vector<string> nodeSet) {
231         try {
232                 vector<string> leftChildSet;
233                 vector<string> rightChildSet;
234                 
235                 if (m->control_pressed) { return 1; }
236                 
237                 //if you are at a leaf
238                 if (nodeSet.size() == 1) {
239                         //return the vector index of the leaf you are at
240                         return consensusTree->getIndex(nodeSet[0]);
241                 //terminate recursion
242                 }else if (count == numNodes) { return 0; }
243                 else {
244                         //finds best child pair
245                         leftChildSet = getNextAvailableSet(nodeSet, rightChildSet);
246                         int left = buildConcensusTree(leftChildSet);
247                         int right = buildConcensusTree(rightChildSet);
248                         consensusTree->tree[count].setChildren(left, right);
249                         consensusTree->tree[count].setLabel(nodePairsInTree[nodeSet]); 
250                         consensusTree->tree[left].setParent(count);
251                         consensusTree->tree[right].setParent(count);
252                         count++;
253                         return (count-1);
254                 }
255         
256         }
257         catch(exception& e) {
258                 m->errorOut(e, "ConcensusCommand", "buildConcensusTree");
259                 exit(1);
260         }
261 }
262
263 //**********************************************************************************************************************
264 int ConcensusCommand::getSets() {
265         try {
266                 vector<string> temp;
267                 treeSet.clear();
268                 
269                 //for each tree add the possible pairs you find
270                 for (int i = 0; i < t.size(); i++) {
271                         
272                         //for each non-leaf node get descendant info.
273                         for (int j = numLeaves; j < numNodes; j++) {
274                                 
275                                 if (m->control_pressed) { return 1; }
276                                 
277                                 temp.clear();
278                                 //go through pcounts and pull out descendants
279                                 for (it = t[i]->tree[j].pcount.begin(); it != t[i]->tree[j].pcount.end(); it++) {
280                                         temp.push_back(it->first);
281                                 }
282                                 
283                                 //sort temp
284                                 sort(temp.begin(), temp.end());
285                                 
286                                 it2 = nodePairs.find(temp);
287                                 if (it2 != nodePairs.end()) {
288                                         nodePairs[temp]++;
289                                 }else{
290                                         nodePairs[temp] = 1;
291                                 }
292                         }
293                 }
294                 
295                 
296                 //add each leaf to terminate recursion in consensus
297                 //you want the leaves in there but with insignifigant sightings value so it is added last
298                 //for each leaf node get descendant info.
299                 for (int j = 0; j < numLeaves; j++) {
300                 
301                         if (m->control_pressed) { return 1; }
302                         
303                         //only need the first one since leaves have no descendants but themselves
304                         it = t[0]->tree[j].pcount.begin(); 
305                         temp.clear();  temp.push_back(it->first);
306                         
307                         //fill treeSet
308                         treeSet.push_back(it->first);
309                         
310                         //add leaf to list but with sighting value less then all non leaf pairs 
311                         nodePairs[temp] = 0;
312                 }
313
314                 sort(treeSet.begin(), treeSet.end());
315                 
316                 
317                 map< vector<string>, int> nodePairsCopy = nodePairs;
318                 
319                 
320                 //set initial rating on pairs to sightings + subgroup sightings
321                 while (nodePairsCopy.size() != 0) {
322                         if (m->control_pressed) { return 1; }
323                 
324                         vector<string> smallOne = getSmallest(nodePairsCopy);
325                         
326                         int subgrouprate = getSubgroupRating(smallOne);
327                 
328                         nodePairsInitialRate[smallOne] = nodePairs[smallOne] + subgrouprate;
329                         
330                         nodePairsCopy.erase(smallOne);
331                 }
332                 
333                 return 0;
334         }
335         catch(exception& e) {
336                 m->errorOut(e, "ConcensusCommand", "getSets");
337                 exit(1);
338         }
339 }
340 //**********************************************************************************************************************
341 vector<string> ConcensusCommand::getSmallest(map< vector<string>, int> nodes) {
342         try{
343                 vector<string> smallest = nodes.begin()->first;
344                 int smallsize = smallest.size();
345                 
346                 for(it2 = nodes.begin(); it2 != nodes.end(); it2++) {
347                         if(it2->first.size() < smallsize) { smallsize = it2->first.size();  smallest = it2->first;  }
348                 }
349                 
350                 return smallest;
351         }
352         catch(exception& e) {
353                 m->errorOut(e, "ConcensusCommand", "getSmallest");
354                 exit(1);
355         }
356 }
357
358 //**********************************************************************************************************************
359 vector<string> ConcensusCommand::getNextAvailableSet(vector<string> bigset, vector<string>& rest) {
360         try {
361 //cout << "new call " << endl << endl << endl;
362                 vector<string> largest; largest.clear();
363                 rest.clear();
364                 
365                 //if you are just 2 groups
366                 if (bigset.size() == 2)  {   
367                         rest.push_back(bigset[0]);
368                         largest.push_back(bigset[1]);
369                 }else{
370                         rest = bestSplit[bigset][0];
371                         largest = bestSplit[bigset][1];
372                 }
373                 
374                 
375                 //save for printing out later and for branch lengths
376                 nodePairsInTree[rest] = nodePairs[rest];
377                 
378                 //delete whatever set you return because it is no longer available
379                 nodePairs.erase(rest);
380
381                 //save for printing out later and for branch lengths
382                 nodePairsInTree[largest] = nodePairs[largest];
383                 
384                 //delete whatever set you return because it is no longer available
385                 nodePairs.erase(largest);
386                 
387                 return largest;
388         
389         }
390         catch(exception& e) {
391                 m->errorOut(e, "ConcensusCommand", "getNextAvailableSet");
392                 exit(1);
393         }
394 }
395
396 /**********************************************************************************************************************/
397 int ConcensusCommand::getSubgroupRating(vector<string> group) {
398         try {
399                 map< vector<string>, int>::iterator ittemp;
400                 map< vector< vector<string> > , int >::iterator it3;
401                 int rate = 0;
402                 
403                 // ***********************************************************************************//
404                 //1. this function must be called passing it littlest sets to biggest 
405                 //              since it the rating is made from your sighting plus you best splits rating
406                 //2. it saves the top pair to use later
407                 // ***********************************************************************************//
408
409                 
410                 if (group.size() < 3) {  return rate;  }
411                 
412                 map< vector<string>, int> possiblePairing;  //this is all the subsets of group
413                 
414                 //go through the sets
415                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
416                         //are you a subset of bigset, then save in possiblePairings
417                         if (isSubset(group, it2->first) == true) {  possiblePairing[it2->first] = it2->second;  }
418                 }               
419         
420                 map< vector< vector<string> > , int > rating;
421                 
422                 while (possiblePairing.size() != 0) {
423                 
424                         it2 = possiblePairing.begin(); 
425                         vector<string> temprest = getRestSet(group, it2->first);
426                         
427                         //is the rest a set available in possiblePairings
428                         ittemp = possiblePairing.find(temprest);
429                         if (ittemp != possiblePairing.end()) {  //if the rest is in the possible pairings then add this pair to rating map
430                                 vector< vector<string> > temprate;
431                                 temprate.push_back(it2->first);  temprate.push_back(temprest);
432                                 
433                                 rating[temprate] = (nodePairsInitialRate[it2->first] + nodePairsInitialRate[temprest]);
434                                 
435                                 //erase so you dont add 1,2 and 2,1.
436                                 possiblePairing.erase(temprest);
437                         }
438                         
439                         possiblePairing.erase(it2);
440                 }
441
442
443                 it3 = rating.begin();
444                 rate = it3->second;
445                 vector< vector<string> > topPair = it3->first;
446                 
447                 //choose the split with the best rating
448                 for (it3 = rating.begin(); it3 != rating.end(); it3++) {
449                         
450                         if (it3->second > rate) {  rate = it3->second;  topPair = it3->first;  }
451                 }
452                 
453                 bestSplit[group] = topPair;
454                 
455                 return rate;
456         }
457         catch(exception& e) {
458                 m->errorOut(e, "ConcensusCommand", "getSubgroupRating");
459                 exit(1);
460         }
461 }
462
463 //**********************************************************************************************************************
464 vector<string> ConcensusCommand::getRestSet(vector<string> bigset, vector<string> subset) {
465         try {
466                 vector<string> rest;
467                 
468                 for (int i = 0; i < bigset.size(); i++) {
469                         bool inSubset = false;
470                         for (int j = 0; j < subset.size(); j++) {
471                                 if (bigset[i] == subset[j]) { inSubset = true; break; }
472                         }
473                         
474                         //its not in the subset so put it in the rest
475                         if (inSubset == false) { rest.push_back(bigset[i]); }
476                 }
477
478                 return rest;
479         
480         }
481         catch(exception& e) {
482                 m->errorOut(e, "ConcensusCommand", "getRestSet");
483                 exit(1);
484         }
485 }
486
487 //**********************************************************************************************************************
488 bool ConcensusCommand::isSubset(vector<string> bigset, vector<string> subset) {
489         try {
490                 
491         
492                 if (subset.size() > bigset.size()) { return false;  }
493                 
494                 //check if each guy in suset is also in bigset
495                 for (int i = 0; i < subset.size(); i++) {
496                         bool match = false;
497                         for (int j = 0; j < bigset.size(); j++) {
498                                 if (subset[i] == bigset[j]) { match = true; break; }
499                         }
500                         
501                         //you have a guy in subset that had no match in bigset
502                         if (match == false) { return false; }
503                 }
504                 
505                 return true;
506         
507         }
508         catch(exception& e) {
509                 m->errorOut(e, "ConcensusCommand", "isSubset");
510                 exit(1);
511         }
512 }
513 //**********************************************************************************************************************
514 int ConcensusCommand::findSpot(string node) {
515         try {
516                 int spot;
517                 
518                 //check if each guy in suset is also in bigset
519                 for (int i = 0; i < treeSet.size(); i++) {
520                         if (treeSet[i] == node) { spot = i; break; }
521                 }
522                 
523                 return spot;
524         
525         }
526         catch(exception& e) {
527                 m->errorOut(e, "ConcensusCommand", "findSpot");
528                 exit(1);
529         }
530 }
531 //**********************************************************************************************************************
532
533
534
535