]> git.donarmstrong.com Git - mothur.git/blobdiff - consensuscommand.cpp
added otu.association command. added calcSpearman, calcKendall and calcPearson functi...
[mothur.git] / consensuscommand.cpp
index 9a90219d14c3809a687e053851440971eecb2e9b..e522b098f0703d6909f1906c3039fb4459e99854 100644 (file)
 #include "consensuscommand.h"
 
 //**********************************************************************************************************************
-
-ConcensusCommand::ConcensusCommand(string fileroot){
+vector<string> ConcensusCommand::setParameters(){      
        try {
-               globaldata = GlobalData::getInstance();
-               abort = false;
-               
-               filename = fileroot;
-               //allow user to run help
-               //if(option == "help") { help(); abort = true; }
-               
-               //else {
-                       //if (option != "") { mothurOut("There are no valid parameters for the consensus command."); mothurOutEndLine(); abort = true; }
-                       
-               //      //no trees were read
-               //      if (globaldata->gTree.size() == 0) {  mothurOut("You must execute the read.tree command, before you may use the consensus command."); mothurOutEndLine(); abort = true;  }
-                       //else { 
-                        t = globaldata->gTree;
-                        //     }
-               //}
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
        }
        catch(exception& e) {
-               errorOut(e, "ConcensusCommand", "ConcensusCommand");
+               m->errorOut(e, "ConcensusCommand", "setParameters");
                exit(1);
        }
 }
-
 //**********************************************************************************************************************
-
-void ConcensusCommand::help(){
+string ConcensusCommand::getHelpString(){      
        try {
-               mothurOut("The consensus command can only be executed after a successful read.tree command.\n");
-               mothurOut("The consensus command has no parameters.\n");
-               mothurOut("The consensus command should be in the following format: consensus().\n");
-               mothurOut("The consensus command output two files: .consensus.tre and .consensuspairs.\n");
-               mothurOut("The .consensus.tre file contains the consensus tree of the trees in your input file.\n");
-               mothurOut("The branch lengths are the percentage of trees in your input file that had the given pair.\n");
-               mothurOut("The .consensuspairs file contains a list of the internal nodes in your tree.  For each node, the pair that was used in the consensus tree \n");
-               mothurOut("is reported with its percentage, as well as the other pairs that were seen for that node but not used and their percentages.\n\n");          
+               string helpString = "";
+               helpString += "The consensus command can only be executed after a successful read.tree command.\n";
+               helpString += "The consensus command has no parameters.\n";
+               helpString += "The consensus command should be in the following format: consensus().\n";
+               helpString += "The consensus command output two files: .consensus.tre and .consensuspairs.\n";
+               helpString += "The .consensus.tre file contains the consensus tree of the trees in your input file.\n";
+               helpString += "The branch lengths are the percentage of trees in your input file that had the given pair.\n";
+               helpString += "The .consensuspairs file contains a list of the internal nodes in your tree.  For each node, the pair that was used in the consensus tree \n";
+               helpString += "is reported with its percentage, as well as the other pairs that were seen for that node but not used and their percentages.\n";         
+               return helpString;
        }
        catch(exception& e) {
-               errorOut(e, "ConcensusCommand", "help");
+               m->errorOut(e, "ConcensusCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+ConcensusCommand::ConcensusCommand(){  
+       try {
+               abort = true; calledHelp = true; 
+               setParameters();
+               vector<string> tempOutNames;
+               outputTypes["tree"] = tempOutNames;
+               outputTypes["nodepairs"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ConcensusCommand", "ConcensusCommand");
                exit(1);
        }
 }
-
 //**********************************************************************************************************************
 
-ConcensusCommand::~ConcensusCommand(){}
-
+ConcensusCommand::ConcensusCommand(string fileroot)  {
+       try {
+               abort = false; calledHelp = false;   
+               
+               setParameters();
+               
+               //initialize outputTypes
+               vector<string> tempOutNames;
+               outputTypes["tree"] = tempOutNames;
+               outputTypes["nodepairs"] = tempOutNames;
+               
+               filename = fileroot;
+       
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ConcensusCommand", "ConcensusCommand");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 
 int ConcensusCommand::execute(){
        try {
                
-               if (abort == true) { return 0; }
-               else {  
-                       numNodes = t[0]->getNumNodes();
-                       numLeaves = t[0]->getNumLeaves();
-               }
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+               
+               
+               m->mothurOut("This command is not currently in use."); m->mothurOutEndLine();
+       /*      
+               t = globaldata->gTree;
+               numNodes = t[0]->getNumNodes();
+               numLeaves = t[0]->getNumLeaves();
+               
                
                //get the possible pairings
-               getSets();              
+               getSets();      
+               
+               if (m->control_pressed) { return 0; }
                
                //open file for pairing not included in the tree
-               notIncluded = filename + ".cons.pairs";
-               openOutputFile(notIncluded, out2);
+               notIncluded = filename + ".cons.pairs"; outputNames.push_back(notIncluded);  outputTypes["nodepairs"].push_back(notIncluded);
+               m->openOutputFile(notIncluded, out2);
                
                consensusTree = new Tree();
                
@@ -91,16 +113,26 @@ int ConcensusCommand::execute(){
                
                buildConcensusTree(treeSet);
                
+               if (m->control_pressed) { delete consensusTree; return 0; }
+               
                consensusTree->assembleTree();
                
+               if (m->control_pressed) { delete consensusTree; return 0; }
+               
                //output species in order
                out2 << "Species in Order: " << endl << endl;
                for (int i = 0; i < treeSet.size(); i++) {  out2 << i+1 << ".  " << treeSet[i] << endl; }
                
-               vector<string> temp; 
                //output sets included
                out2 << endl << "Sets included in the consensus tree:" << endl << endl;
+               
+               if (m->control_pressed) { delete consensusTree; return 0; }
+               
+               vector<string> temp;
                for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) {
+               
+                       if (m->control_pressed) { delete consensusTree; return 0; }
+                       
                        //only output pairs not leaves
                        if (it2->first.size() > 1) { 
                                temp.clear();
@@ -112,6 +144,7 @@ int ConcensusCommand::execute(){
                                        //find spot 
                                        int index = findSpot(it2->first[i]);
                                        temp[index] = "*";
+                                       //temp[index] = it2->first[i] + "  ";
                                }
                                
                                //output temp
@@ -125,6 +158,9 @@ int ConcensusCommand::execute(){
                //output sets not included
                out2 << endl << "Sets NOT included in the consensus tree:" << endl << endl;
                for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
+               
+                       if (m->control_pressed) { delete consensusTree; return 0; }
+                       
                        temp.clear();
                        //initialize temp to all "."
                        temp.resize(treeSet.size(), ".");
@@ -143,19 +179,26 @@ int ConcensusCommand::execute(){
                        out2 << '\t' << it2->second << endl;
                }
                
-               outputFile = filename + ".cons.tre";
-               openOutputFile(outputFile, out);
+               outputFile = filename + ".cons.tre";  outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
+               m->openOutputFile(outputFile, out);
                
-               consensusTree->printForBoot(out);
+               consensusTree->print(out, "boot");
                
                out.close(); out2.close();
                
                delete consensusTree; 
                
+               //set first tree file as new current treefile
+               string currentTree = "";
+               itTypes = outputTypes.find("tree");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { currentTree = (itTypes->second)[0]; m->setTreeFile(currentTree); }
+               }
+               */
                return 0;
        }
        catch(exception& e) {
-               errorOut(e, "ConcensusCommand", "execute");
+               m->errorOut(e, "ConcensusCommand", "execute");
                exit(1);
        }
 }
@@ -166,6 +209,8 @@ int ConcensusCommand::buildConcensusTree(vector<string> nodeSet) {
                vector<string> leftChildSet;
                vector<string> rightChildSet;
                
+               if (m->control_pressed) { return 1; }
+               
                //if you are at a leaf
                if (nodeSet.size() == 1) {
                        //return the vector index of the leaf you are at
@@ -173,8 +218,8 @@ int ConcensusCommand::buildConcensusTree(vector<string> nodeSet) {
                //terminate recursion
                }else if (count == numNodes) { return 0; }
                else {
-                       leftChildSet = getNextAvailableSet(nodeSet);
-                       rightChildSet = getRestSet(nodeSet, leftChildSet);
+                       //finds best child pair
+                       leftChildSet = getNextAvailableSet(nodeSet, rightChildSet);
                        int left = buildConcensusTree(leftChildSet);
                        int right = buildConcensusTree(rightChildSet);
                        consensusTree->tree[count].setChildren(left, right);
@@ -187,13 +232,13 @@ int ConcensusCommand::buildConcensusTree(vector<string> nodeSet) {
        
        }
        catch(exception& e) {
-               errorOut(e, "ConcensusCommand", "buildConcensusTree");
+               m->errorOut(e, "ConcensusCommand", "buildConcensusTree");
                exit(1);
        }
 }
 
 //**********************************************************************************************************************
-void ConcensusCommand::getSets() {
+int ConcensusCommand::getSets() {
        try {
                vector<string> temp;
                treeSet.clear();
@@ -203,6 +248,9 @@ void ConcensusCommand::getSets() {
                        
                        //for each non-leaf node get descendant info.
                        for (int j = numLeaves; j < numNodes; j++) {
+                               
+                               if (m->control_pressed) { return 1; }
+                               
                                temp.clear();
                                //go through pcounts and pull out descendants
                                for (it = t[i]->tree[j].pcount.begin(); it != t[i]->tree[j].pcount.end(); it++) {
@@ -221,10 +269,13 @@ void ConcensusCommand::getSets() {
                        }
                }
                
+               
                //add each leaf to terminate recursion in consensus
                //you want the leaves in there but with insignifigant sightings value so it is added last
                //for each leaf node get descendant info.
                for (int j = 0; j < numLeaves; j++) {
+               
+                       if (m->control_pressed) { return 1; }
                        
                        //only need the first one since leaves have no descendants but themselves
                        it = t[0]->tree[j].pcount.begin(); 
@@ -238,33 +289,72 @@ void ConcensusCommand::getSets() {
                }
 
                sort(treeSet.begin(), treeSet.end());
+               
+               
+               map< vector<string>, int> nodePairsCopy = nodePairs;
+               
+               
+               //set initial rating on pairs to sightings + subgroup sightings
+               while (nodePairsCopy.size() != 0) {
+                       if (m->control_pressed) { return 1; }
+               
+                       vector<string> smallOne = getSmallest(nodePairsCopy);
+                       
+                       int subgrouprate = getSubgroupRating(smallOne);
+               
+                       nodePairsInitialRate[smallOne] = nodePairs[smallOne] + subgrouprate;
+                       
+                       nodePairsCopy.erase(smallOne);
+               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ConcensusCommand", "getSets");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> ConcensusCommand::getSmallest(map< vector<string>, int> nodes) {
+       try{
+               vector<string> smallest = nodes.begin()->first;
+               int smallsize = smallest.size();
+               
+               for(it2 = nodes.begin(); it2 != nodes.end(); it2++) {
+                       if(it2->first.size() < smallsize) { smallsize = it2->first.size();  smallest = it2->first;  }
+               }
+               
+               return smallest;
        }
        catch(exception& e) {
-               errorOut(e, "ConcensusCommand", "getSets");
+               m->errorOut(e, "ConcensusCommand", "getSmallest");
                exit(1);
        }
 }
 
 //**********************************************************************************************************************
-vector<string> ConcensusCommand::getNextAvailableSet(vector<string> bigset) {
+vector<string> ConcensusCommand::getNextAvailableSet(vector<string> bigset, vector<string>& rest) {
        try {
+//cout << "new call " << endl << endl << endl;
                vector<string> largest; largest.clear();
-               int largestSighting = -1;
-               
-               //go through the sets
-               for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
-                       //are you a subset of bigset
-                       if (isSubset(bigset, it2->first) == true) {
-                       
-                               //are you the largest. if you are the same size as current largest refer to sighting
-                               if (it2->first.size() > largest.size()) { largest = it2->first;  largestSighting = it2->second; }
-                               else if (it2->first.size() == largest.size()) {
-                                       if (it2->second > largestSighting) { largest = it2->first;  largestSighting = it2->second; }
-                               }
-                               
-                       }
+               rest.clear();
+               
+               //if you are just 2 groups
+               if (bigset.size() == 2)  {   
+                       rest.push_back(bigset[0]);
+                       largest.push_back(bigset[1]);
+               }else{
+                       rest = bestSplit[bigset][0];
+                       largest = bestSplit[bigset][1];
                }
                
+               
+               //save for printing out later and for branch lengths
+               nodePairsInTree[rest] = nodePairs[rest];
+               
+               //delete whatever set you return because it is no longer available
+               nodePairs.erase(rest);
+
                //save for printing out later and for branch lengths
                nodePairsInTree[largest] = nodePairs[largest];
                
@@ -275,7 +365,74 @@ vector<string> ConcensusCommand::getNextAvailableSet(vector<string> bigset) {
        
        }
        catch(exception& e) {
-               errorOut(e, "ConcensusCommand", "getNextAvailableSet");
+               m->errorOut(e, "ConcensusCommand", "getNextAvailableSet");
+               exit(1);
+       }
+}
+
+/**********************************************************************************************************************/
+int ConcensusCommand::getSubgroupRating(vector<string> group) {
+       try {
+               map< vector<string>, int>::iterator ittemp;
+               map< vector< vector<string> > , int >::iterator it3;
+               int rate = 0;
+               
+               // ***********************************************************************************//
+               //1. this function must be called passing it littlest sets to biggest 
+               //              since it the rating is made from your sighting plus you best splits rating
+               //2. it saves the top pair to use later
+               // ***********************************************************************************//
+
+               
+               if (group.size() < 3) {  return rate;  }
+               
+               map< vector<string>, int> possiblePairing;  //this is all the subsets of group
+               
+               //go through the sets
+               for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
+                       //are you a subset of bigset, then save in possiblePairings
+                       if (isSubset(group, it2->first) == true) {  possiblePairing[it2->first] = it2->second;  }
+               }               
+       
+               map< vector< vector<string> > , int > rating;
+               
+               while (possiblePairing.size() != 0) {
+               
+                       it2 = possiblePairing.begin(); 
+                       vector<string> temprest = getRestSet(group, it2->first);
+                       
+                       //is the rest a set available in possiblePairings
+                       ittemp = possiblePairing.find(temprest);
+                       if (ittemp != possiblePairing.end()) {  //if the rest is in the possible pairings then add this pair to rating map
+                               vector< vector<string> > temprate;
+                               temprate.push_back(it2->first);  temprate.push_back(temprest);
+                               
+                               rating[temprate] = (nodePairsInitialRate[it2->first] + nodePairsInitialRate[temprest]);
+                               
+                               //erase so you dont add 1,2 and 2,1.
+                               possiblePairing.erase(temprest);
+                       }
+                       
+                       possiblePairing.erase(it2);
+               }
+
+
+               it3 = rating.begin();
+               rate = it3->second;
+               vector< vector<string> > topPair = it3->first;
+               
+               //choose the split with the best rating
+               for (it3 = rating.begin(); it3 != rating.end(); it3++) {
+                       
+                       if (it3->second > rate) {  rate = it3->second;  topPair = it3->first;  }
+               }
+               
+               bestSplit[group] = topPair;
+               
+               return rate;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ConcensusCommand", "getSubgroupRating");
                exit(1);
        }
 }
@@ -294,18 +451,12 @@ vector<string> ConcensusCommand::getRestSet(vector<string> bigset, vector<string
                        //its not in the subset so put it in the rest
                        if (inSubset == false) { rest.push_back(bigset[i]); }
                }
-               
-               //save for printing out later and for branch lengths
-               nodePairsInTree[rest] = nodePairs[rest];
-               
-               //delete whatever set you return because it is no longer available
-               nodePairs.erase(rest);
 
                return rest;
        
        }
        catch(exception& e) {
-               errorOut(e, "ConcensusCommand", "getRestSet");
+               m->errorOut(e, "ConcensusCommand", "getRestSet");
                exit(1);
        }
 }
@@ -314,6 +465,9 @@ vector<string> ConcensusCommand::getRestSet(vector<string> bigset, vector<string
 bool ConcensusCommand::isSubset(vector<string> bigset, vector<string> subset) {
        try {
                
+       
+               if (subset.size() > bigset.size()) { return false;  }
+               
                //check if each guy in suset is also in bigset
                for (int i = 0; i < subset.size(); i++) {
                        bool match = false;
@@ -329,7 +483,7 @@ bool ConcensusCommand::isSubset(vector<string> bigset, vector<string> subset) {
        
        }
        catch(exception& e) {
-               errorOut(e, "ConcensusCommand", "isSubset");
+               m->errorOut(e, "ConcensusCommand", "isSubset");
                exit(1);
        }
 }
@@ -347,7 +501,7 @@ int ConcensusCommand::findSpot(string node) {
        
        }
        catch(exception& e) {
-               errorOut(e, "ConcensusCommand", "findSpot");
+               m->errorOut(e, "ConcensusCommand", "findSpot");
                exit(1);
        }
 }