]> git.donarmstrong.com Git - mothur.git/blobdiff - treegroupscommand.cpp
fixed multiple bugs for 1.4 release
[mothur.git] / treegroupscommand.cpp
index 43e39f52d6447384133c1c566bfa1fcfe9c7179e..a21c47afcb1df45fc858c4877c43cc7fe9a586e2 100644 (file)
@@ -27,30 +27,32 @@ TreeGroupCommand::TreeGroupCommand(){
                globaldata = GlobalData::getInstance();
                format = globaldata->getFormat();
                validCalculator = new ValidCalculators();
-                       
-               int i;
-               for (i=0; i<globaldata->Estimators.size(); i++) {
-                       if (validCalculator->isValidCalculator("treegroup", globaldata->Estimators[i]) == true) { 
-                               if (globaldata->Estimators[i] == "jabund") {    
-                                       treeCalculators.push_back(new JAbund());
-                               }else if (globaldata->Estimators[i] == "sorabund") { 
-                                       treeCalculators.push_back(new SorAbund());
-                               }else if (globaldata->Estimators[i] == "jclass") { 
-                                       treeCalculators.push_back(new Jclass());
-                               }else if (globaldata->Estimators[i] == "sorclass") { 
-                                       treeCalculators.push_back(new SorClass());
-                               }else if (globaldata->Estimators[i] == "jest") { 
-                                       treeCalculators.push_back(new Jest());
-                               }else if (globaldata->Estimators[i] == "sorest") { 
-                                       treeCalculators.push_back(new SorEst());
-                               }else if (globaldata->Estimators[i] == "thetayc") { 
-                                       treeCalculators.push_back(new ThetaYC());
-                               }else if (globaldata->Estimators[i] == "thetan") { 
-                                       treeCalculators.push_back(new ThetaN());
-                               }else if (globaldata->Estimators[i] == "morisitahorn") { 
-                                       treeCalculators.push_back(new MorHorn());
-                               }else if (globaldata->Estimators[i] == "braycurtis") { 
-                                       treeCalculators.push_back(new BrayCurtis());
+               
+               if (format == "sharedfile") {
+                       int i;
+                       for (i=0; i<globaldata->Estimators.size(); i++) {
+                               if (validCalculator->isValidCalculator("treegroup", globaldata->Estimators[i]) == true) { 
+                                       if (globaldata->Estimators[i] == "jabund") {    
+                                               treeCalculators.push_back(new JAbund());
+                                       }else if (globaldata->Estimators[i] == "sorabund") { 
+                                               treeCalculators.push_back(new SorAbund());
+                                       }else if (globaldata->Estimators[i] == "jclass") { 
+                                               treeCalculators.push_back(new Jclass());
+                                       }else if (globaldata->Estimators[i] == "sorclass") { 
+                                               treeCalculators.push_back(new SorClass());
+                                       }else if (globaldata->Estimators[i] == "jest") { 
+                                               treeCalculators.push_back(new Jest());
+                                       }else if (globaldata->Estimators[i] == "sorest") { 
+                                               treeCalculators.push_back(new SorEst());
+                                       }else if (globaldata->Estimators[i] == "thetayc") { 
+                                               treeCalculators.push_back(new ThetaYC());
+                                       }else if (globaldata->Estimators[i] == "thetan") { 
+                                               treeCalculators.push_back(new ThetaN());
+                                       }else if (globaldata->Estimators[i] == "morisitahorn") { 
+                                               treeCalculators.push_back(new MorHorn());
+                                       }else if (globaldata->Estimators[i] == "braycurtis") { 
+                                               treeCalculators.push_back(new BrayCurtis());
+                                       }
                                }
                        }
                }
@@ -72,100 +74,81 @@ TreeGroupCommand::TreeGroupCommand(){
 
 TreeGroupCommand::~TreeGroupCommand(){
        delete input;
-       delete read;
+       if (format == "sharedfile") {delete read;}
+       else { delete readMatrix;  delete matrix; delete list; }
+       delete tmap;
+       
 }
 
 //**********************************************************************************************************************
 
 int TreeGroupCommand::execute(){
        try {
-               int count = 1;  
-               EstOutput data;
-               vector<SharedRAbundVector*> subset;
-               
-               //if the users entered no valid calculators don't execute command
-               if (treeCalculators.size() == 0) { return 0; }
+               if (format == "sharedfile") {
+                       //if the users entered no valid calculators don't execute command
+                       if (treeCalculators.size() == 0) { cout << "You have given no valid calculators." << endl; return 0; }
 
-               //you have groups
-               read = new ReadOTUFile(globaldata->inputFileName);      
-               read->read(&*globaldata); 
+                       //you have groups
+                       read = new ReadOTUFile(globaldata->inputFileName);      
+                       read->read(&*globaldata); 
                        
-               input = globaldata->ginput;
-               lookup = input->getSharedRAbundVectors();
-                               
-               if (lookup.size() < 2) { cout << "You have not provided enough valid groups.  I cannot run the command." << endl; return 0; }
-               
-               numGroups = globaldata->Groups.size();
-               groupNames = "";
-               for (int i = 0; i < numGroups; i++) { groupNames += globaldata->Groups[i]; }
-               
-               //clear globaldatas old tree names if any
-               globaldata->Treenames.clear();
-               
-               //fills globaldatas tree names
-               globaldata->Treenames = globaldata->Groups;
-               
-               //create treemap class from groupmap for tree class to use
-               tmap = new TreeMap();
-               tmap->makeSim(globaldata->gGroupmap);
-               globaldata->gTreemap = tmap;
+                       input = globaldata->ginput;
+                       lookup = input->getSharedRAbundVectors();
                        
-               while(lookup[0] != NULL){
+                       if (lookup.size() < 2) { cout << "You have not provided enough valid groups.  I cannot run the command." << endl; return 0; }
                
-                       if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){                   
-                               
-                               cout << lookup[0]->getLabel() << '\t' << count << endl;
-                               
-                               //for each calculator                                                                                           
-                               for(int i = 0 ; i < treeCalculators.size(); i++) {
-                                       
-                                       //initialize simMatrix
-                                       simMatrix.clear();
-                                       simMatrix.resize(numGroups);
-                                       for (int m = 0; m < simMatrix.size(); m++)      {
-                                               for (int j = 0; j < simMatrix.size(); j++)      {
-                                                       simMatrix[m].push_back(0.0);
-                                               }
-                                       }
+                       //create tree file
+                       makeSimsShared();
+               }else{
+                       //read in dist file
+                       filename = globaldata->inputFileName;
+               
+                       if (format == "column") { readMatrix = new ReadColumnMatrix(filename); }        
+                       else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); }
                                
-                                       //initialize index
-                                       index.clear();
-                                       for (int g = 0; g < numGroups; g++) {   index[g] = g;   }
+                       if(globaldata->getPrecision() != ""){
+                               convert(globaldata->getPrecision(), precision); 
+                       }
                
-                                       //create a new filename
-                                       outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + lookup[0]->getLabel() + ".tre";
-                                       
-                                                                                               
-                                       for (int k = 0; k < lookup.size(); k++) { 
-                                               for (int l = k; l < lookup.size(); l++) {
-                                                       if (k != l) { //we dont need to similiarity of a groups to itself
-                                                               //get estimated similarity between 2 groups
-                                                               
-                                                               subset.clear(); //clear out old pair of sharedrabunds
-                                                               //add new pair of sharedrabunds
-                                                               subset.push_back(lookup[k]); subset.push_back(lookup[l]); 
-                                                               
-                                                               data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
-                                                               //save values in similarity matrix
-                                                               simMatrix[k][l] = data[0];
-                                                               simMatrix[l][k] = data[0];
-                                                       }
-                                               }
-                                       }
-                                       
-                                       //creates tree from similarity matrix and write out file
-                                       createTree();
-                               }
+                       if(globaldata->getCutOff() != ""){
+                               convert(globaldata->getCutOff(), cutoff);       
+                               cutoff += (5 / (precision * 10.0));
                        }
+                       readMatrix->setCutoff(cutoff);
+       
+                       if(globaldata->getNameFile() != ""){    
+                               nameMap = new NameAssignment(globaldata->getNameFile());
+                               nameMap->readMap(1,2);
+                       }
+                       else{
+                               nameMap = NULL;
+                       }
+       
+                       readMatrix->read(nameMap);
+                       list = readMatrix->getListVector();
+                       matrix = readMatrix->getMatrix();
+
+                       //make treemap
+                       tmap = new TreeMap();
+                       tmap->makeSim(list);
+                       globaldata->gTreemap = tmap;
                        
-                       //prevent memory leak
-                       for (int i = 0; i < lookup.size(); i++) {       delete lookup[i];       }
-                       
-                       //get next line to process
-                       lookup = input->getSharedRAbundVectors();
-                       count++;
-               }
+                       globaldata->Groups = tmap->namesOfGroups;
+               
+                       //clear globaldatas old tree names if any
+                       globaldata->Treenames.clear();
                
+                       //fills globaldatas tree names
+                       globaldata->Treenames = globaldata->Groups;
+
+                       makeSimsDist();
+
+                       //create a new filename
+                       outputFile = getRootName(globaldata->inputFileName) + "tre";    
+                               
+                       createTree();
+               }
+                               
                //reset groups parameter
                globaldata->Groups.clear();  globaldata->setGroups("");
 
@@ -190,8 +173,8 @@ void TreeGroupCommand::createTree(){
                //do merges and create tree structure by setting parents and children
                //there are numGroups - 1 merges to do
                for (int i = 0; i < (numGroups - 1); i++) {
-                       
-                       float largest = 0.0;
+                       float largest = -1000.0;
+
                        int row, column;
                        //find largest value in sims matrix by searching lower triangle
                        for (int j = 1; j < simMatrix.size(); j++) {
@@ -223,9 +206,9 @@ void TreeGroupCommand::createTree(){
                        index[row] = numGroups+i;
                        index[column] = numGroups+i;
                        
-                       //zero out highest value that caused the merge.
-                       simMatrix[row][column] = 0.0;
-                       simMatrix[column][row] = 0.0;
+                       //remove highest value that caused the merge.
+                       simMatrix[row][column] = -1000.0;
+                       simMatrix[column][row] = -1000.0;
                        
                        //merge values in simsMatrix
                        for (int n = 0; n < simMatrix.size(); n++)      {
@@ -233,11 +216,15 @@ void TreeGroupCommand::createTree(){
                                simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
                                simMatrix[n][row] = simMatrix[row][n];
                                //delete column
-                               simMatrix[column][n] = 0.0;
-                               simMatrix[n][column] = 0.0;
+                               simMatrix[column][n] = -1000.0;
+                               simMatrix[n][column] = -1000.0;
                        }
                }
-       
+               
+               //adjust tree to make sure root to tip length is .5
+               int root = t->findRoot();
+               t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
+               
                //assemble tree
                t->assembleTree();
                
@@ -262,13 +249,13 @@ void TreeGroupCommand::printSims(ostream& out) {
        try {
                
                //output column headers
-               out << '\t';
-               for (int i = 0; i < lookup.size(); i++) {       out << lookup[i]->getGroup() << '\t';           }
-               out << endl;
+               //out << '\t';
+               //for (int i = 0; i < lookup.size(); i++) {     out << lookup[i]->getGroup() << '\t';           }
+               //out << endl;
                
                
                for (int m = 0; m < simMatrix.size(); m++)      {
-                       out << lookup[m]->getGroup() << '\t';
+                       //out << lookup[m]->getGroup() << '\t';
                        for (int n = 0; n < simMatrix.size(); n++)      {
                                out << simMatrix[m][n] << '\t'; 
                        }
@@ -286,5 +273,129 @@ void TreeGroupCommand::printSims(ostream& out) {
        }               
 }
 /***********************************************************/
+void TreeGroupCommand::makeSimsDist() {
+       try {
+               numGroups = list->size();
+               
+               //initialize index
+               index.clear();
+               for (int g = 0; g < numGroups; g++) {   index[g] = g;   }
+               
+               //initialize simMatrix
+               simMatrix.clear();
+               simMatrix.resize(numGroups);
+               for (int m = 0; m < simMatrix.size(); m++)      {
+                       for (int j = 0; j < simMatrix.size(); j++)      {
+                               simMatrix[m].push_back(0.0);
+                       }
+               }
+               
+               //go through sparse matrix and fill sims
+               //go through each cell in the sparsematrix
+               for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){
+                       //similairity = -(distance-1)
+                       simMatrix[currentCell->row][currentCell->column] = -(currentCell->dist -1.0);   
+                       simMatrix[currentCell->column][currentCell->row] = -(currentCell->dist -1.0);                           
+               }
+
+
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function makeSimsDist. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the TreeGroupCommand class function makeSimsDist. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }               
+}
+
+/***********************************************************/
+void TreeGroupCommand::makeSimsShared() {
+       try {
+               int count = 1;  
+               EstOutput data;
+               vector<SharedRAbundVector*> subset;
+       
+               numGroups = globaldata->Groups.size();
+               
+               //clear globaldatas old tree names if any
+               globaldata->Treenames.clear();
+               
+               //fills globaldatas tree names
+               globaldata->Treenames = globaldata->Groups;
+               
+               //create treemap class from groupmap for tree class to use
+               tmap = new TreeMap();
+               tmap->makeSim(globaldata->gGroupmap);
+               globaldata->gTreemap = tmap;
+               
+               while(lookup[0] != NULL){
+               
+                       if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(lookup[0]->getLabel()) == 1){                   
+                               
+                               cout << lookup[0]->getLabel() << '\t' << count << endl;
+                               
+                               //for each calculator                                                                                           
+                               for(int i = 0 ; i < treeCalculators.size(); i++) {
+                                       //initialize simMatrix
+                                       simMatrix.clear();
+                                       simMatrix.resize(numGroups);
+                                       for (int m = 0; m < simMatrix.size(); m++)      {
+                                               for (int j = 0; j < simMatrix.size(); j++)      {
+                                                       simMatrix[m].push_back(0.0);
+                                               }
+                                       }
+               
+                                       //initialize index
+                                       index.clear();
+                                       for (int g = 0; g < numGroups; g++) {   index[g] = g;   }
+               
+                                       //create a new filename
+                                       outputFile = getRootName(globaldata->inputFileName) + treeCalculators[i]->getName() + "." + lookup[0]->getLabel() + ".tre";                             
+                                                                                               
+                                       for (int k = 0; k < lookup.size(); k++) { 
+                                               for (int l = k; l < lookup.size(); l++) {
+                                                       if (k != l) { //we dont need to similiarity of a groups to itself
+                                                               //get estimated similarity between 2 groups
+                                                               
+                                                               subset.clear(); //clear out old pair of sharedrabunds
+                                                               //add new pair of sharedrabunds
+                                                               subset.push_back(lookup[k]); subset.push_back(lookup[l]); 
+                                                               
+                                                               data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
+                                                               //save values in similarity matrix
+                                                               simMatrix[k][l] = data[0];
+                                                               simMatrix[l][k] = data[0];
+                                                       }
+                                               }
+                                       }
+                                       
+                                       //creates tree from similarity matrix and write out file
+                                       createTree();
+                               }
+                       }
+                       
+                       //prevent memory leak
+                       for (int i = 0; i < lookup.size(); i++) {       delete lookup[i];       }
+                       
+                       //get next line to process
+                       lookup = input->getSharedRAbundVectors();
+                       count++;
+               }
+               
+               for(int i = 0 ; i < treeCalculators.size(); i++) {  delete treeCalculators[i]; }
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the TreeGroupCommand class Function makeSimsShared. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the TreeGroupCommand class function makeSimsShared. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }               
+}
+
+/***********************************************************/