]> git.donarmstrong.com Git - mothur.git/blob - consensuscommand.cpp
added getCommandInfoCommand for gui
[mothur.git] / consensuscommand.cpp
1 /*
2  *  consensuscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/29/09.
6  *  Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
7  *
8  */
9
10 #include "consensuscommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> ConcensusCommand::setParameters(){       
14         try {
15                 vector<string> myArray;
16                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
17                 return myArray;
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "ConcensusCommand", "setParameters");
21                 exit(1);
22         }
23 }
24 //**********************************************************************************************************************
25 string ConcensusCommand::getHelpString(){       
26         try {
27                 string helpString = "";
28                 helpString += "The consensus command can only be executed after a successful read.tree command.\n";
29                 helpString += "The consensus command has no parameters.\n";
30                 helpString += "The consensus command should be in the following format: consensus().\n";
31                 helpString += "The consensus command output two files: .consensus.tre and .consensuspairs.\n";
32                 helpString += "The .consensus.tre file contains the consensus tree of the trees in your input file.\n";
33                 helpString += "The branch lengths are the percentage of trees in your input file that had the given pair.\n";
34                 helpString += "The .consensuspairs file contains a list of the internal nodes in your tree.  For each node, the pair that was used in the consensus tree \n";
35                 helpString += "is reported with its percentage, as well as the other pairs that were seen for that node but not used and their percentages.\n";         
36                 return helpString;
37         }
38         catch(exception& e) {
39                 m->errorOut(e, "ConcensusCommand", "getHelpString");
40                 exit(1);
41         }
42 }
43 //**********************************************************************************************************************
44 ConcensusCommand::ConcensusCommand(){   
45         try {
46                 abort = true; calledHelp = true; 
47                 setParameters();
48                 vector<string> tempOutNames;
49                 outputTypes["tree"] = tempOutNames;
50                 outputTypes["nodepairs"] = tempOutNames;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "ConcensusCommand", "ConcensusCommand");
54                 exit(1);
55         }
56 }
57 //**********************************************************************************************************************
58
59 ConcensusCommand::ConcensusCommand(string fileroot)  {
60         try {
61                 abort = false; calledHelp = false;   
62                 
63                 setParameters();
64                 
65                 //initialize outputTypes
66                 vector<string> tempOutNames;
67                 outputTypes["tree"] = tempOutNames;
68                 outputTypes["nodepairs"] = tempOutNames;
69                 
70                 filename = fileroot;
71         
72         }
73         catch(exception& e) {
74                 m->errorOut(e, "ConcensusCommand", "ConcensusCommand");
75                 exit(1);
76         }
77 }
78 //**********************************************************************************************************************
79
80 int ConcensusCommand::execute(){
81         try {
82                 
83                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
84                 
85                 m->mothurOut("This command is not currently in use."); m->mothurOutEndLine();
86         /*      
87                 t = globaldata->gTree;
88                 numNodes = t[0]->getNumNodes();
89                 numLeaves = t[0]->getNumLeaves();
90                 
91                 
92                 //get the possible pairings
93                 getSets();      
94                 
95                 if (m->control_pressed) { return 0; }
96                 
97                 //open file for pairing not included in the tree
98                 notIncluded = filename + ".cons.pairs"; outputNames.push_back(notIncluded);  outputTypes["nodepairs"].push_back(notIncluded);
99                 m->openOutputFile(notIncluded, out2);
100                 
101                 consensusTree = new Tree();
102                 
103                 it2 = nodePairs.find(treeSet);
104                 
105                 nodePairsInTree[treeSet] = it2->second; 
106                 
107                 //erase treeset because you are adding it
108                 nodePairs.erase(treeSet);
109                 
110                 //set count to numLeaves;
111                 count = numLeaves;
112                 
113                 buildConcensusTree(treeSet);
114                 
115                 if (m->control_pressed) { delete consensusTree; return 0; }
116                 
117                 consensusTree->assembleTree();
118                 
119                 if (m->control_pressed) { delete consensusTree; return 0; }
120                 
121                 //output species in order
122                 out2 << "Species in Order: " << endl << endl;
123                 for (int i = 0; i < treeSet.size(); i++) {  out2 << i+1 << ".  " << treeSet[i] << endl; }
124                 
125                 //output sets included
126                 out2 << endl << "Sets included in the consensus tree:" << endl << endl;
127                 
128                 if (m->control_pressed) { delete consensusTree; return 0; }
129                 
130                 vector<string> temp;
131                 for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) {
132                 
133                         if (m->control_pressed) { delete consensusTree; return 0; }
134                         
135                         //only output pairs not leaves
136                         if (it2->first.size() > 1) { 
137                                 temp.clear();
138                                 //initialize temp to all "."
139                                 temp.resize(treeSet.size(), ".");
140                                 
141                                 //set the spot in temp that represents it2->first[i] to a "*" 
142                                 for (int i = 0; i < it2->first.size(); i++) {
143                                         //find spot 
144                                         int index = findSpot(it2->first[i]);
145                                         temp[index] = "*";
146                                         //temp[index] = it2->first[i] + "  ";
147                                 }
148                                 
149                                 //output temp
150                                 for (int j = 0; j < temp.size(); j++) { 
151                                         out2 << temp[j];
152                                 }
153                                 out2 << '\t' << it2->second << endl;
154                         }
155                 }
156                 
157                 //output sets not included
158                 out2 << endl << "Sets NOT included in the consensus tree:" << endl << endl;
159                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
160                 
161                         if (m->control_pressed) { delete consensusTree; return 0; }
162                         
163                         temp.clear();
164                         //initialize temp to all "."
165                         temp.resize(treeSet.size(), ".");
166                                 
167                         //set the spot in temp that represents it2->first[i] to a "*" 
168                         for (int i = 0; i < it2->first.size(); i++) {
169                                 //find spot 
170                                 int index = findSpot(it2->first[i]);
171                                 temp[index] = "*";
172                         }
173                                 
174                         //output temp
175                         for (int j = 0; j < temp.size(); j++) { 
176                                 out2 << temp[j];
177                         }
178                         out2 << '\t' << it2->second << endl;
179                 }
180                 
181                 outputFile = filename + ".cons.tre";  outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
182                 m->openOutputFile(outputFile, out);
183                 
184                 consensusTree->print(out, "boot");
185                 
186                 out.close(); out2.close();
187                 
188                 delete consensusTree; 
189                 
190                 //set first tree file as new current treefile
191                 string currentTree = "";
192                 itTypes = outputTypes.find("tree");
193                 if (itTypes != outputTypes.end()) {
194                         if ((itTypes->second).size() != 0) { currentTree = (itTypes->second)[0]; m->setTreeFile(currentTree); }
195                 }
196                 */
197                 return 0;
198         }
199         catch(exception& e) {
200                 m->errorOut(e, "ConcensusCommand", "execute");
201                 exit(1);
202         }
203 }
204
205 //**********************************************************************************************************************
206 int ConcensusCommand::buildConcensusTree(vector<string> nodeSet) {
207         try {
208                 vector<string> leftChildSet;
209                 vector<string> rightChildSet;
210                 
211                 if (m->control_pressed) { return 1; }
212                 
213                 //if you are at a leaf
214                 if (nodeSet.size() == 1) {
215                         //return the vector index of the leaf you are at
216                         return consensusTree->getIndex(nodeSet[0]);
217                 //terminate recursion
218                 }else if (count == numNodes) { return 0; }
219                 else {
220                         //finds best child pair
221                         leftChildSet = getNextAvailableSet(nodeSet, rightChildSet);
222                         int left = buildConcensusTree(leftChildSet);
223                         int right = buildConcensusTree(rightChildSet);
224                         consensusTree->tree[count].setChildren(left, right);
225                         consensusTree->tree[count].setLabel(nodePairsInTree[nodeSet]); 
226                         consensusTree->tree[left].setParent(count);
227                         consensusTree->tree[right].setParent(count);
228                         count++;
229                         return (count-1);
230                 }
231         
232         }
233         catch(exception& e) {
234                 m->errorOut(e, "ConcensusCommand", "buildConcensusTree");
235                 exit(1);
236         }
237 }
238
239 //**********************************************************************************************************************
240 int ConcensusCommand::getSets() {
241         try {
242                 vector<string> temp;
243                 treeSet.clear();
244                 
245                 //for each tree add the possible pairs you find
246                 for (int i = 0; i < t.size(); i++) {
247                         
248                         //for each non-leaf node get descendant info.
249                         for (int j = numLeaves; j < numNodes; j++) {
250                                 
251                                 if (m->control_pressed) { return 1; }
252                                 
253                                 temp.clear();
254                                 //go through pcounts and pull out descendants
255                                 for (it = t[i]->tree[j].pcount.begin(); it != t[i]->tree[j].pcount.end(); it++) {
256                                         temp.push_back(it->first);
257                                 }
258                                 
259                                 //sort temp
260                                 sort(temp.begin(), temp.end());
261                                 
262                                 it2 = nodePairs.find(temp);
263                                 if (it2 != nodePairs.end()) {
264                                         nodePairs[temp]++;
265                                 }else{
266                                         nodePairs[temp] = 1;
267                                 }
268                         }
269                 }
270                 
271                 
272                 //add each leaf to terminate recursion in consensus
273                 //you want the leaves in there but with insignifigant sightings value so it is added last
274                 //for each leaf node get descendant info.
275                 for (int j = 0; j < numLeaves; j++) {
276                 
277                         if (m->control_pressed) { return 1; }
278                         
279                         //only need the first one since leaves have no descendants but themselves
280                         it = t[0]->tree[j].pcount.begin(); 
281                         temp.clear();  temp.push_back(it->first);
282                         
283                         //fill treeSet
284                         treeSet.push_back(it->first);
285                         
286                         //add leaf to list but with sighting value less then all non leaf pairs 
287                         nodePairs[temp] = 0;
288                 }
289
290                 sort(treeSet.begin(), treeSet.end());
291                 
292                 
293                 map< vector<string>, int> nodePairsCopy = nodePairs;
294                 
295                 
296                 //set initial rating on pairs to sightings + subgroup sightings
297                 while (nodePairsCopy.size() != 0) {
298                         if (m->control_pressed) { return 1; }
299                 
300                         vector<string> smallOne = getSmallest(nodePairsCopy);
301                         
302                         int subgrouprate = getSubgroupRating(smallOne);
303                 
304                         nodePairsInitialRate[smallOne] = nodePairs[smallOne] + subgrouprate;
305                         
306                         nodePairsCopy.erase(smallOne);
307                 }
308                 
309                 return 0;
310         }
311         catch(exception& e) {
312                 m->errorOut(e, "ConcensusCommand", "getSets");
313                 exit(1);
314         }
315 }
316 //**********************************************************************************************************************
317 vector<string> ConcensusCommand::getSmallest(map< vector<string>, int> nodes) {
318         try{
319                 vector<string> smallest = nodes.begin()->first;
320                 int smallsize = smallest.size();
321                 
322                 for(it2 = nodes.begin(); it2 != nodes.end(); it2++) {
323                         if(it2->first.size() < smallsize) { smallsize = it2->first.size();  smallest = it2->first;  }
324                 }
325                 
326                 return smallest;
327         }
328         catch(exception& e) {
329                 m->errorOut(e, "ConcensusCommand", "getSmallest");
330                 exit(1);
331         }
332 }
333
334 //**********************************************************************************************************************
335 vector<string> ConcensusCommand::getNextAvailableSet(vector<string> bigset, vector<string>& rest) {
336         try {
337 //cout << "new call " << endl << endl << endl;
338                 vector<string> largest; largest.clear();
339                 rest.clear();
340                 
341                 //if you are just 2 groups
342                 if (bigset.size() == 2)  {   
343                         rest.push_back(bigset[0]);
344                         largest.push_back(bigset[1]);
345                 }else{
346                         rest = bestSplit[bigset][0];
347                         largest = bestSplit[bigset][1];
348                 }
349                 
350                 
351                 //save for printing out later and for branch lengths
352                 nodePairsInTree[rest] = nodePairs[rest];
353                 
354                 //delete whatever set you return because it is no longer available
355                 nodePairs.erase(rest);
356
357                 //save for printing out later and for branch lengths
358                 nodePairsInTree[largest] = nodePairs[largest];
359                 
360                 //delete whatever set you return because it is no longer available
361                 nodePairs.erase(largest);
362                 
363                 return largest;
364         
365         }
366         catch(exception& e) {
367                 m->errorOut(e, "ConcensusCommand", "getNextAvailableSet");
368                 exit(1);
369         }
370 }
371
372 /**********************************************************************************************************************/
373 int ConcensusCommand::getSubgroupRating(vector<string> group) {
374         try {
375                 map< vector<string>, int>::iterator ittemp;
376                 map< vector< vector<string> > , int >::iterator it3;
377                 int rate = 0;
378                 
379                 // ***********************************************************************************//
380                 //1. this function must be called passing it littlest sets to biggest 
381                 //              since it the rating is made from your sighting plus you best splits rating
382                 //2. it saves the top pair to use later
383                 // ***********************************************************************************//
384
385                 
386                 if (group.size() < 3) {  return rate;  }
387                 
388                 map< vector<string>, int> possiblePairing;  //this is all the subsets of group
389                 
390                 //go through the sets
391                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
392                         //are you a subset of bigset, then save in possiblePairings
393                         if (isSubset(group, it2->first) == true) {  possiblePairing[it2->first] = it2->second;  }
394                 }               
395         
396                 map< vector< vector<string> > , int > rating;
397                 
398                 while (possiblePairing.size() != 0) {
399                 
400                         it2 = possiblePairing.begin(); 
401                         vector<string> temprest = getRestSet(group, it2->first);
402                         
403                         //is the rest a set available in possiblePairings
404                         ittemp = possiblePairing.find(temprest);
405                         if (ittemp != possiblePairing.end()) {  //if the rest is in the possible pairings then add this pair to rating map
406                                 vector< vector<string> > temprate;
407                                 temprate.push_back(it2->first);  temprate.push_back(temprest);
408                                 
409                                 rating[temprate] = (nodePairsInitialRate[it2->first] + nodePairsInitialRate[temprest]);
410                                 
411                                 //erase so you dont add 1,2 and 2,1.
412                                 possiblePairing.erase(temprest);
413                         }
414                         
415                         possiblePairing.erase(it2);
416                 }
417
418
419                 it3 = rating.begin();
420                 rate = it3->second;
421                 vector< vector<string> > topPair = it3->first;
422                 
423                 //choose the split with the best rating
424                 for (it3 = rating.begin(); it3 != rating.end(); it3++) {
425                         
426                         if (it3->second > rate) {  rate = it3->second;  topPair = it3->first;  }
427                 }
428                 
429                 bestSplit[group] = topPair;
430                 
431                 return rate;
432         }
433         catch(exception& e) {
434                 m->errorOut(e, "ConcensusCommand", "getSubgroupRating");
435                 exit(1);
436         }
437 }
438
439 //**********************************************************************************************************************
440 vector<string> ConcensusCommand::getRestSet(vector<string> bigset, vector<string> subset) {
441         try {
442                 vector<string> rest;
443                 
444                 for (int i = 0; i < bigset.size(); i++) {
445                         bool inSubset = false;
446                         for (int j = 0; j < subset.size(); j++) {
447                                 if (bigset[i] == subset[j]) { inSubset = true; break; }
448                         }
449                         
450                         //its not in the subset so put it in the rest
451                         if (inSubset == false) { rest.push_back(bigset[i]); }
452                 }
453
454                 return rest;
455         
456         }
457         catch(exception& e) {
458                 m->errorOut(e, "ConcensusCommand", "getRestSet");
459                 exit(1);
460         }
461 }
462
463 //**********************************************************************************************************************
464 bool ConcensusCommand::isSubset(vector<string> bigset, vector<string> subset) {
465         try {
466                 
467         
468                 if (subset.size() > bigset.size()) { return false;  }
469                 
470                 //check if each guy in suset is also in bigset
471                 for (int i = 0; i < subset.size(); i++) {
472                         bool match = false;
473                         for (int j = 0; j < bigset.size(); j++) {
474                                 if (subset[i] == bigset[j]) { match = true; break; }
475                         }
476                         
477                         //you have a guy in subset that had no match in bigset
478                         if (match == false) { return false; }
479                 }
480                 
481                 return true;
482         
483         }
484         catch(exception& e) {
485                 m->errorOut(e, "ConcensusCommand", "isSubset");
486                 exit(1);
487         }
488 }
489 //**********************************************************************************************************************
490 int ConcensusCommand::findSpot(string node) {
491         try {
492                 int spot;
493                 
494                 //check if each guy in suset is also in bigset
495                 for (int i = 0; i < treeSet.size(); i++) {
496                         if (treeSet[i] == node) { spot = i; break; }
497                 }
498                 
499                 return spot;
500         
501         }
502         catch(exception& e) {
503                 m->errorOut(e, "ConcensusCommand", "findSpot");
504                 exit(1);
505         }
506 }
507 //**********************************************************************************************************************
508
509
510
511