]> git.donarmstrong.com Git - mothur.git/blobdiff - treegroupscommand.cpp
major change to the tree class to use the count table class instead of tree map....
[mothur.git] / treegroupscommand.cpp
index 4a77211d2bd5e8d90585a9f19e6383db6e5f16fc..fb4887c73da88c0c631895e82f05090e97645761 100644 (file)
@@ -27,7 +27,7 @@ vector<string> TreeGroupCommand::setParameters(){
                CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson", "jclass-thetayc", "", "", "",true,false); parameters.push_back(pcalc);
                
         CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
-        CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput);
+//CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                
@@ -70,6 +70,26 @@ string TreeGroupCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
+string TreeGroupCommand::getOutputFileNameTag(string type, string inputName=""){       
+       try {
+        string outputFileName = "";
+               map<string, vector<string> >::iterator it;
+        
+        //is this a type this command creates
+        it = outputTypes.find(type);
+        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+        else {
+            if (type == "tree")            {   outputFileName =  "tre";   }
+            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
+        }
+        return outputFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TreeGroupCommand", "getOutputFileNameTag");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 TreeGroupCommand::TreeGroupCommand(){  
        try {
                abort = true; calledHelp = true;
@@ -266,8 +286,8 @@ TreeGroupCommand::TreeGroupCommand(string option)  {
 TreeGroupCommand::~TreeGroupCommand(){
        if (abort == false) {
                if (format == "sharedfile") {  delete input; }
-               else { delete readMatrix;  delete matrix; delete list; }
-               delete tmap;  
+               else { delete list; }
+               delete ct;  
        }
        
 }
@@ -380,8 +400,16 @@ int TreeGroupCommand::execute(){
                        m->runParse = false;
                        
                        //create treemap class from groupmap for tree class to use
-                       tmap = new TreeMap();
-                       tmap->makeSim(m->getAllGroups());
+                       ct = new CountTable();
+            set<string> nameMap;
+            map<string, string> groupMap;
+            set<string> gps;
+            for (int i = 0; i < m->getAllGroups().size(); i++) { 
+                nameMap.insert(m->getAllGroups()[i]); 
+                gps.insert(m->getAllGroups()[i]); 
+                groupMap[m->getAllGroups()[i]] = m->getAllGroups()[i];
+            }
+            ct->createTable(nameMap, groupMap, gps);
                        
                        //clear globaldatas old tree names if any
                        m->Treenames.clear();
@@ -398,7 +426,8 @@ int TreeGroupCommand::execute(){
                }else{
                        //read in dist file
                        filename = inputfile;
-               
+            
+            ReadMatrix* readMatrix;
                        if (format == "column") { readMatrix = new ReadColumnMatrix(filename); }        
                        else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); }
                                
@@ -408,22 +437,26 @@ int TreeGroupCommand::execute(){
                                nameMap = new NameAssignment(namefile);
                                nameMap->readMap();
                        }
-                       else{
-                               nameMap = NULL;
-                       }
+                       else{ nameMap = NULL; }
        
                        readMatrix->read(nameMap);
                        list = readMatrix->getListVector();
-                       matrix = readMatrix->getMatrix();
+                       SparseDistanceMatrix* dMatrix = readMatrix->getDMatrix();
 
                        //make treemap
-                       tmap = new TreeMap();
-                       
-                       if (m->control_pressed) { return 0; }
-                       
-                       tmap->makeSim(list);
+                       ct = new CountTable();
+            set<string> nameMap;
+            map<string, string> groupMap;
+            set<string> gps;
+            for (int i = 0; i < list->getNumBins(); i++) {
+                string bin = list->get(i);
+                nameMap.insert(bin); 
+                gps.insert(bin); 
+                groupMap[bin] = bin;
+            }
+            ct->createTable(nameMap, groupMap, gps);
                        
-                       vector<string> namesGroups = tmap->getNamesOfGroups();
+                       vector<string> namesGroups = ct->getNamesOfGroups();
                        m->setGroups(namesGroups);
                
                        //clear globaldatas old tree names if any
@@ -437,12 +470,14 @@ int TreeGroupCommand::execute(){
                        
                        if (m->control_pressed) { return 0; }
                        
-                       vector< vector<double> > matrix = makeSimsDist();
+                       vector< vector<double> > matrix = makeSimsDist(dMatrix);
+            delete readMatrix;
+            delete dMatrix;
                        
                        if (m->control_pressed) { return 0; }
 
                        //create a new filename
-                       string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + "tre";    
+                       string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + getOutputFileNameTag("tree");     
                        outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
                                
                        Tree* newTree = createTree(matrix);
@@ -482,76 +517,14 @@ int TreeGroupCommand::execute(){
 Tree* TreeGroupCommand::createTree(vector< vector<double> >& simMatrix){
        try {
                //create tree
-               t = new Tree(tmap, simMatrix);
+               t = new Tree(ct, simMatrix);
         
-       /* //initialize index
-        map<int, int> index;  //maps row in simMatrix to vector index in the tree
-        for (int g = 0; g < numGroups; g++) {  index[g] = g;   }
+        if (m->control_pressed) { delete t; t = NULL; return t; }
                
-               //do merges and create tree structure by setting parents and children
-               //there are numGroups - 1 merges to do
-               for (int i = 0; i < (numGroups - 1); i++) {
-                       float largest = -1000.0;
-                       
-                       if (m->control_pressed) { delete t; t = NULL; return t; }
-                       
-                       int row, column;
-                       //find largest value in sims matrix by searching lower triangle
-                       for (int j = 1; j < simMatrix.size(); j++) {
-                               for (int k = 0; k < j; k++) {
-                                       if (simMatrix[j][k] > largest) {  largest = simMatrix[j][k]; row = j; column = k;  }
-                               }
-                       }
-
-                       //set non-leaf node info and update leaves to know their parents
-                       //non-leaf
-                       t->tree[numGroups + i].setChildren(index[row], index[column]);
-                       
-                       //parents
-                       t->tree[index[row]].setParent(numGroups + i);
-                       t->tree[index[column]].setParent(numGroups + i);
-                       
-                       //blength = distance / 2;
-                       float blength = ((1.0 - largest) / 2);
-                       
-                       //branchlengths
-                       t->tree[index[row]].setBranchLength(blength - t->tree[index[row]].getLengthToLeaves());
-                       t->tree[index[column]].setBranchLength(blength - t->tree[index[column]].getLengthToLeaves());
-                       
-                       //set your length to leaves to your childs length plus branchlength
-                       t->tree[numGroups + i].setLengthToLeaves(t->tree[index[row]].getLengthToLeaves() + t->tree[index[row]].getBranchLength());
-                       
-                       
-                       //update index 
-                       index[row] = numGroups+i;
-                       index[column] = numGroups+i;
-                       
-                       //remove highest value that caused the merge.
-                       simMatrix[row][column] = -1000.0;
-                       simMatrix[column][row] = -1000.0;
-                       
-                       //merge values in simsMatrix
-                       for (int n = 0; n < simMatrix.size(); n++)      {
-                               //row becomes merge of 2 groups
-                               simMatrix[row][n] = (simMatrix[row][n] + simMatrix[column][n]) / 2;
-                               simMatrix[n][row] = simMatrix[row][n];
-                               //delete column
-                               simMatrix[column][n] = -1000.0;
-                               simMatrix[n][column] = -1000.0;
-                       }
-               }
-               
-               //adjust tree to make sure root to tip length is .5
-               int root = t->findRoot();
-               t->tree[root].setBranchLength((0.5 - t->tree[root].getLengthToLeaves()));
-               */
-               //assemble tree
+        //assemble tree
                t->assembleTree();
-               
-               if (m->control_pressed) { delete t; t = NULL; return t; }
-               
+
                return t;
-       
        }
        catch(exception& e) {
                m->errorOut(e, "TreeGroupCommand", "createTree");
@@ -595,7 +568,7 @@ void TreeGroupCommand::printSims(ostream& out, vector< vector<double> >& simMatr
        }
 }
 /***********************************************************/
-vector< vector<double> > TreeGroupCommand::makeSimsDist() {
+vector< vector<double> > TreeGroupCommand::makeSimsDist(SparseDistanceMatrix* matrix) {
        try {
                numGroups = list->size();
                
@@ -610,13 +583,17 @@ vector< vector<double> > TreeGroupCommand::makeSimsDist() {
                
                //go through sparse matrix and fill sims
                //go through each cell in the sparsematrix
-               for(MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++){
-                       //similairity = -(distance-1)
-                       simMatrix[currentCell->row][currentCell->column] = -(currentCell->dist -1.0);   
-                       simMatrix[currentCell->column][currentCell->row] = -(currentCell->dist -1.0);   
-                       
-                       if (m->control_pressed) { return simMatrix; }
+        for (int i = 0; i < matrix->seqVec.size(); i++) {
+            for (int j = 0; j < matrix->seqVec[i].size(); j++) {
+                
+                //already checked everyone else in row
+                if (i < matrix->seqVec[i][j].index) {   
+                    simMatrix[i][matrix->seqVec[i][j].index] = -(matrix->seqVec[i][j].dist -1.0);      
+                    simMatrix[matrix->seqVec[i][j].index][i] = -(matrix->seqVec[i][j].dist -1.0);      
                        
+                    if (m->control_pressed) { return simMatrix; }
+                }
+            }
                }
 
                return simMatrix;
@@ -631,13 +608,6 @@ vector< vector<double> > TreeGroupCommand::makeSimsDist() {
 int TreeGroupCommand::makeSimsShared() {
        try {
         
-        numGroups = lookup.size();
-               lines.resize(processors);
-               for (int i = 0; i < processors; i++) {
-                       lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
-                       lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
-               }       
-        
         if (subsample) { 
             if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
                 subsampleSize = lookup[0]->getNumSeqs();
@@ -662,8 +632,17 @@ int TreeGroupCommand::makeSimsShared() {
                 lookup = temp;
                 m->setGroups(Groups);
             }
+            
+            if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
         }
         
+        numGroups = lookup.size();
+               lines.resize(processors);
+               for (int i = 0; i < processors; i++) {
+                       lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
+                       lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
+               }       
+        
                set<string> processedLabels;
                set<string> userLabels = labels;
                
@@ -955,7 +934,7 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                 }
                 
                 //create a new filename
-                string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".ave.tre";                            
+                string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".ave." + getOutputFileNameTag("tree");                                
                 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
                 
                 //creates tree from similarity matrix and write out file
@@ -968,7 +947,7 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                 if (m->control_pressed) { break; }
                 
                 //create a new filename
-                string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".all.tre";                            
+                string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".all." + getOutputFileNameTag("tree");                                
                 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
                 
                 ofstream outAll;
@@ -1006,10 +985,10 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                 Consensus consensus;
                 //clear old tree names if any
                 m->Treenames.clear(); m->Treenames = m->getGroups(); //may have changed if subsample eliminated groups
-                Tree* conTree = consensus.getTree(trees, tmap);
+                Tree* conTree = consensus.getTree(trees);
                 
                 //create a new filename
-                string conFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".cons.tre";                              
+                string conFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".cons." + getOutputFileNameTag("tree");                          
                 outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
                 ofstream outTree;
                 m->openOutputFile(conFile, outTree);
@@ -1037,7 +1016,7 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                 }
                 
                 //create a new filename
-                string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".tre";                                
+                string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + getOutputFileNameTag("tree");                            
                 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
                 
                 //creates tree from similarity matrix and write out file