]> git.donarmstrong.com Git - mothur.git/blob - consensuscommand.cpp
added citation function to commands
[mothur.git] / consensuscommand.cpp
1 /*
2  *  consensuscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/29/09.
6  *  Copyright 2009 Schloss Lab UMASS AMherst. All rights reserved.
7  *
8  */
9
10 #include "consensuscommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> ConcensusCommand::setParameters(){       
14         try {
15                 vector<string> myArray;
16                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
17                 return myArray;
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "ConcensusCommand", "setParameters");
21                 exit(1);
22         }
23 }
24 //**********************************************************************************************************************
25 string ConcensusCommand::getHelpString(){       
26         try {
27                 string helpString = "";
28                 helpString += "The consensus command can only be executed after a successful read.tree command.\n";
29                 helpString += "The consensus command has no parameters.\n";
30                 helpString += "The consensus command should be in the following format: consensus().\n";
31                 helpString += "The consensus command output two files: .consensus.tre and .consensuspairs.\n";
32                 helpString += "The .consensus.tre file contains the consensus tree of the trees in your input file.\n";
33                 helpString += "The branch lengths are the percentage of trees in your input file that had the given pair.\n";
34                 helpString += "The .consensuspairs file contains a list of the internal nodes in your tree.  For each node, the pair that was used in the consensus tree \n";
35                 helpString += "is reported with its percentage, as well as the other pairs that were seen for that node but not used and their percentages.\n";         
36                 return helpString;
37         }
38         catch(exception& e) {
39                 m->errorOut(e, "ConcensusCommand", "getHelpString");
40                 exit(1);
41         }
42 }
43 //**********************************************************************************************************************
44 ConcensusCommand::ConcensusCommand(){   
45         try {
46                 abort = true; calledHelp = true; 
47                 setParameters();
48                 vector<string> tempOutNames;
49                 outputTypes["tree"] = tempOutNames;
50                 outputTypes["nodepairs"] = tempOutNames;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "ConcensusCommand", "ConcensusCommand");
54                 exit(1);
55         }
56 }
57 //**********************************************************************************************************************
58
59 ConcensusCommand::ConcensusCommand(string fileroot)  {
60         try {
61                 abort = false; calledHelp = false;   
62                 
63                 setParameters();
64                 
65                 //initialize outputTypes
66                 vector<string> tempOutNames;
67                 outputTypes["tree"] = tempOutNames;
68                 outputTypes["nodepairs"] = tempOutNames;
69                 
70                 filename = fileroot;
71         
72         }
73         catch(exception& e) {
74                 m->errorOut(e, "ConcensusCommand", "ConcensusCommand");
75                 exit(1);
76         }
77 }
78 //**********************************************************************************************************************
79
80 int ConcensusCommand::execute(){
81         try {
82                 
83                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
84                 
85                 
86                 m->mothurOut("This command is not currently in use."); m->mothurOutEndLine();
87         /*      
88                 t = globaldata->gTree;
89                 numNodes = t[0]->getNumNodes();
90                 numLeaves = t[0]->getNumLeaves();
91                 
92                 
93                 //get the possible pairings
94                 getSets();      
95                 
96                 if (m->control_pressed) { return 0; }
97                 
98                 //open file for pairing not included in the tree
99                 notIncluded = filename + ".cons.pairs"; outputNames.push_back(notIncluded);  outputTypes["nodepairs"].push_back(notIncluded);
100                 m->openOutputFile(notIncluded, out2);
101                 
102                 consensusTree = new Tree();
103                 
104                 it2 = nodePairs.find(treeSet);
105                 
106                 nodePairsInTree[treeSet] = it2->second; 
107                 
108                 //erase treeset because you are adding it
109                 nodePairs.erase(treeSet);
110                 
111                 //set count to numLeaves;
112                 count = numLeaves;
113                 
114                 buildConcensusTree(treeSet);
115                 
116                 if (m->control_pressed) { delete consensusTree; return 0; }
117                 
118                 consensusTree->assembleTree();
119                 
120                 if (m->control_pressed) { delete consensusTree; return 0; }
121                 
122                 //output species in order
123                 out2 << "Species in Order: " << endl << endl;
124                 for (int i = 0; i < treeSet.size(); i++) {  out2 << i+1 << ".  " << treeSet[i] << endl; }
125                 
126                 //output sets included
127                 out2 << endl << "Sets included in the consensus tree:" << endl << endl;
128                 
129                 if (m->control_pressed) { delete consensusTree; return 0; }
130                 
131                 vector<string> temp;
132                 for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) {
133                 
134                         if (m->control_pressed) { delete consensusTree; return 0; }
135                         
136                         //only output pairs not leaves
137                         if (it2->first.size() > 1) { 
138                                 temp.clear();
139                                 //initialize temp to all "."
140                                 temp.resize(treeSet.size(), ".");
141                                 
142                                 //set the spot in temp that represents it2->first[i] to a "*" 
143                                 for (int i = 0; i < it2->first.size(); i++) {
144                                         //find spot 
145                                         int index = findSpot(it2->first[i]);
146                                         temp[index] = "*";
147                                         //temp[index] = it2->first[i] + "  ";
148                                 }
149                                 
150                                 //output temp
151                                 for (int j = 0; j < temp.size(); j++) { 
152                                         out2 << temp[j];
153                                 }
154                                 out2 << '\t' << it2->second << endl;
155                         }
156                 }
157                 
158                 //output sets not included
159                 out2 << endl << "Sets NOT included in the consensus tree:" << endl << endl;
160                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
161                 
162                         if (m->control_pressed) { delete consensusTree; return 0; }
163                         
164                         temp.clear();
165                         //initialize temp to all "."
166                         temp.resize(treeSet.size(), ".");
167                                 
168                         //set the spot in temp that represents it2->first[i] to a "*" 
169                         for (int i = 0; i < it2->first.size(); i++) {
170                                 //find spot 
171                                 int index = findSpot(it2->first[i]);
172                                 temp[index] = "*";
173                         }
174                                 
175                         //output temp
176                         for (int j = 0; j < temp.size(); j++) { 
177                                 out2 << temp[j];
178                         }
179                         out2 << '\t' << it2->second << endl;
180                 }
181                 
182                 outputFile = filename + ".cons.tre";  outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
183                 m->openOutputFile(outputFile, out);
184                 
185                 consensusTree->print(out, "boot");
186                 
187                 out.close(); out2.close();
188                 
189                 delete consensusTree; 
190                 
191                 //set first tree file as new current treefile
192                 string currentTree = "";
193                 itTypes = outputTypes.find("tree");
194                 if (itTypes != outputTypes.end()) {
195                         if ((itTypes->second).size() != 0) { currentTree = (itTypes->second)[0]; m->setTreeFile(currentTree); }
196                 }
197                 */
198                 return 0;
199         }
200         catch(exception& e) {
201                 m->errorOut(e, "ConcensusCommand", "execute");
202                 exit(1);
203         }
204 }
205
206 //**********************************************************************************************************************
207 int ConcensusCommand::buildConcensusTree(vector<string> nodeSet) {
208         try {
209                 vector<string> leftChildSet;
210                 vector<string> rightChildSet;
211                 
212                 if (m->control_pressed) { return 1; }
213                 
214                 //if you are at a leaf
215                 if (nodeSet.size() == 1) {
216                         //return the vector index of the leaf you are at
217                         return consensusTree->getIndex(nodeSet[0]);
218                 //terminate recursion
219                 }else if (count == numNodes) { return 0; }
220                 else {
221                         //finds best child pair
222                         leftChildSet = getNextAvailableSet(nodeSet, rightChildSet);
223                         int left = buildConcensusTree(leftChildSet);
224                         int right = buildConcensusTree(rightChildSet);
225                         consensusTree->tree[count].setChildren(left, right);
226                         consensusTree->tree[count].setLabel(nodePairsInTree[nodeSet]); 
227                         consensusTree->tree[left].setParent(count);
228                         consensusTree->tree[right].setParent(count);
229                         count++;
230                         return (count-1);
231                 }
232         
233         }
234         catch(exception& e) {
235                 m->errorOut(e, "ConcensusCommand", "buildConcensusTree");
236                 exit(1);
237         }
238 }
239
240 //**********************************************************************************************************************
241 int ConcensusCommand::getSets() {
242         try {
243                 vector<string> temp;
244                 treeSet.clear();
245                 
246                 //for each tree add the possible pairs you find
247                 for (int i = 0; i < t.size(); i++) {
248                         
249                         //for each non-leaf node get descendant info.
250                         for (int j = numLeaves; j < numNodes; j++) {
251                                 
252                                 if (m->control_pressed) { return 1; }
253                                 
254                                 temp.clear();
255                                 //go through pcounts and pull out descendants
256                                 for (it = t[i]->tree[j].pcount.begin(); it != t[i]->tree[j].pcount.end(); it++) {
257                                         temp.push_back(it->first);
258                                 }
259                                 
260                                 //sort temp
261                                 sort(temp.begin(), temp.end());
262                                 
263                                 it2 = nodePairs.find(temp);
264                                 if (it2 != nodePairs.end()) {
265                                         nodePairs[temp]++;
266                                 }else{
267                                         nodePairs[temp] = 1;
268                                 }
269                         }
270                 }
271                 
272                 
273                 //add each leaf to terminate recursion in consensus
274                 //you want the leaves in there but with insignifigant sightings value so it is added last
275                 //for each leaf node get descendant info.
276                 for (int j = 0; j < numLeaves; j++) {
277                 
278                         if (m->control_pressed) { return 1; }
279                         
280                         //only need the first one since leaves have no descendants but themselves
281                         it = t[0]->tree[j].pcount.begin(); 
282                         temp.clear();  temp.push_back(it->first);
283                         
284                         //fill treeSet
285                         treeSet.push_back(it->first);
286                         
287                         //add leaf to list but with sighting value less then all non leaf pairs 
288                         nodePairs[temp] = 0;
289                 }
290
291                 sort(treeSet.begin(), treeSet.end());
292                 
293                 
294                 map< vector<string>, int> nodePairsCopy = nodePairs;
295                 
296                 
297                 //set initial rating on pairs to sightings + subgroup sightings
298                 while (nodePairsCopy.size() != 0) {
299                         if (m->control_pressed) { return 1; }
300                 
301                         vector<string> smallOne = getSmallest(nodePairsCopy);
302                         
303                         int subgrouprate = getSubgroupRating(smallOne);
304                 
305                         nodePairsInitialRate[smallOne] = nodePairs[smallOne] + subgrouprate;
306                         
307                         nodePairsCopy.erase(smallOne);
308                 }
309                 
310                 return 0;
311         }
312         catch(exception& e) {
313                 m->errorOut(e, "ConcensusCommand", "getSets");
314                 exit(1);
315         }
316 }
317 //**********************************************************************************************************************
318 vector<string> ConcensusCommand::getSmallest(map< vector<string>, int> nodes) {
319         try{
320                 vector<string> smallest = nodes.begin()->first;
321                 int smallsize = smallest.size();
322                 
323                 for(it2 = nodes.begin(); it2 != nodes.end(); it2++) {
324                         if(it2->first.size() < smallsize) { smallsize = it2->first.size();  smallest = it2->first;  }
325                 }
326                 
327                 return smallest;
328         }
329         catch(exception& e) {
330                 m->errorOut(e, "ConcensusCommand", "getSmallest");
331                 exit(1);
332         }
333 }
334
335 //**********************************************************************************************************************
336 vector<string> ConcensusCommand::getNextAvailableSet(vector<string> bigset, vector<string>& rest) {
337         try {
338 //cout << "new call " << endl << endl << endl;
339                 vector<string> largest; largest.clear();
340                 rest.clear();
341                 
342                 //if you are just 2 groups
343                 if (bigset.size() == 2)  {   
344                         rest.push_back(bigset[0]);
345                         largest.push_back(bigset[1]);
346                 }else{
347                         rest = bestSplit[bigset][0];
348                         largest = bestSplit[bigset][1];
349                 }
350                 
351                 
352                 //save for printing out later and for branch lengths
353                 nodePairsInTree[rest] = nodePairs[rest];
354                 
355                 //delete whatever set you return because it is no longer available
356                 nodePairs.erase(rest);
357
358                 //save for printing out later and for branch lengths
359                 nodePairsInTree[largest] = nodePairs[largest];
360                 
361                 //delete whatever set you return because it is no longer available
362                 nodePairs.erase(largest);
363                 
364                 return largest;
365         
366         }
367         catch(exception& e) {
368                 m->errorOut(e, "ConcensusCommand", "getNextAvailableSet");
369                 exit(1);
370         }
371 }
372
373 /**********************************************************************************************************************/
374 int ConcensusCommand::getSubgroupRating(vector<string> group) {
375         try {
376                 map< vector<string>, int>::iterator ittemp;
377                 map< vector< vector<string> > , int >::iterator it3;
378                 int rate = 0;
379                 
380                 // ***********************************************************************************//
381                 //1. this function must be called passing it littlest sets to biggest 
382                 //              since it the rating is made from your sighting plus you best splits rating
383                 //2. it saves the top pair to use later
384                 // ***********************************************************************************//
385
386                 
387                 if (group.size() < 3) {  return rate;  }
388                 
389                 map< vector<string>, int> possiblePairing;  //this is all the subsets of group
390                 
391                 //go through the sets
392                 for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
393                         //are you a subset of bigset, then save in possiblePairings
394                         if (isSubset(group, it2->first) == true) {  possiblePairing[it2->first] = it2->second;  }
395                 }               
396         
397                 map< vector< vector<string> > , int > rating;
398                 
399                 while (possiblePairing.size() != 0) {
400                 
401                         it2 = possiblePairing.begin(); 
402                         vector<string> temprest = getRestSet(group, it2->first);
403                         
404                         //is the rest a set available in possiblePairings
405                         ittemp = possiblePairing.find(temprest);
406                         if (ittemp != possiblePairing.end()) {  //if the rest is in the possible pairings then add this pair to rating map
407                                 vector< vector<string> > temprate;
408                                 temprate.push_back(it2->first);  temprate.push_back(temprest);
409                                 
410                                 rating[temprate] = (nodePairsInitialRate[it2->first] + nodePairsInitialRate[temprest]);
411                                 
412                                 //erase so you dont add 1,2 and 2,1.
413                                 possiblePairing.erase(temprest);
414                         }
415                         
416                         possiblePairing.erase(it2);
417                 }
418
419
420                 it3 = rating.begin();
421                 rate = it3->second;
422                 vector< vector<string> > topPair = it3->first;
423                 
424                 //choose the split with the best rating
425                 for (it3 = rating.begin(); it3 != rating.end(); it3++) {
426                         
427                         if (it3->second > rate) {  rate = it3->second;  topPair = it3->first;  }
428                 }
429                 
430                 bestSplit[group] = topPair;
431                 
432                 return rate;
433         }
434         catch(exception& e) {
435                 m->errorOut(e, "ConcensusCommand", "getSubgroupRating");
436                 exit(1);
437         }
438 }
439
440 //**********************************************************************************************************************
441 vector<string> ConcensusCommand::getRestSet(vector<string> bigset, vector<string> subset) {
442         try {
443                 vector<string> rest;
444                 
445                 for (int i = 0; i < bigset.size(); i++) {
446                         bool inSubset = false;
447                         for (int j = 0; j < subset.size(); j++) {
448                                 if (bigset[i] == subset[j]) { inSubset = true; break; }
449                         }
450                         
451                         //its not in the subset so put it in the rest
452                         if (inSubset == false) { rest.push_back(bigset[i]); }
453                 }
454
455                 return rest;
456         
457         }
458         catch(exception& e) {
459                 m->errorOut(e, "ConcensusCommand", "getRestSet");
460                 exit(1);
461         }
462 }
463
464 //**********************************************************************************************************************
465 bool ConcensusCommand::isSubset(vector<string> bigset, vector<string> subset) {
466         try {
467                 
468         
469                 if (subset.size() > bigset.size()) { return false;  }
470                 
471                 //check if each guy in suset is also in bigset
472                 for (int i = 0; i < subset.size(); i++) {
473                         bool match = false;
474                         for (int j = 0; j < bigset.size(); j++) {
475                                 if (subset[i] == bigset[j]) { match = true; break; }
476                         }
477                         
478                         //you have a guy in subset that had no match in bigset
479                         if (match == false) { return false; }
480                 }
481                 
482                 return true;
483         
484         }
485         catch(exception& e) {
486                 m->errorOut(e, "ConcensusCommand", "isSubset");
487                 exit(1);
488         }
489 }
490 //**********************************************************************************************************************
491 int ConcensusCommand::findSpot(string node) {
492         try {
493                 int spot;
494                 
495                 //check if each guy in suset is also in bigset
496                 for (int i = 0; i < treeSet.size(); i++) {
497                         if (treeSet[i] == node) { spot = i; break; }
498                 }
499                 
500                 return spot;
501         
502         }
503         catch(exception& e) {
504                 m->errorOut(e, "ConcensusCommand", "findSpot");
505                 exit(1);
506         }
507 }
508 //**********************************************************************************************************************
509
510
511
512