]> git.donarmstrong.com Git - mothur.git/blobdiff - treegroupscommand.cpp
working on pam
[mothur.git] / treegroupscommand.cpp
index bba6289ada54856c25d683126b0d181badab4c15..753b5cbe3cf7f67ed623bb8d866b848c6c96e5be 100644 (file)
 //**********************************************************************************************************************
 vector<string> TreeGroupCommand::setParameters(){      
        try {
-               CommandParameter pshared("shared", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none",false,false); parameters.push_back(pshared);
-               CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none",false,false); parameters.push_back(pphylip);
-               CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName",false,false); parameters.push_back(pname);
-               CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "countcolumn",false,false); parameters.push_back(pcount);
-        CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "ColumnName-countcolumn",false,false); parameters.push_back(pcolumn);             
-        CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
-        CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
-        CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
-               CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);                
-               CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
-               CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
-               CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson", "jclass-thetayc", "", "", "",true,false); parameters.push_back(pcalc);
+               CommandParameter pshared("shared", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none","tree",false,false,true); parameters.push_back(pshared);
+               CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none","tree",false,false); parameters.push_back(pphylip);
+               CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName","",false,false); parameters.push_back(pname);
+               CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "countcolumn","",false,false); parameters.push_back(pcount);
+        CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "ColumnName-countcolumn","tree",false,false); parameters.push_back(pcolumn);              
+        CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
+        CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
+        CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pcutoff);
+               CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);             
+               CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+               CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+               CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson", "jclass-thetayc", "", "", "","",true,false,true); parameters.push_back(pcalc);
                
-        CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
+        CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
 //CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -71,24 +71,19 @@ string TreeGroupCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
-string TreeGroupCommand::getOutputFileNameTag(string type, string inputName=""){       
-       try {
-        string outputFileName = "";
-               map<string, vector<string> >::iterator it;
+string TreeGroupCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
         
-        //is this a type this command creates
-        it = outputTypes.find(type);
-        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
-        else {
-            if (type == "tree")            {   outputFileName =  "tre";   }
-            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
-        }
-        return outputFileName;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TreeGroupCommand", "getOutputFileNameTag");
-               exit(1);
-       }
+        if (type == "tree") {  pattern = "[filename],[calc],[distance],[tag],tre-[filename],tre"; } 
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "TreeGroupCommand", "getOutputPattern");
+        exit(1);
+    }
 }
 //**********************************************************************************************************************
 TreeGroupCommand::TreeGroupCommand(){  
@@ -434,7 +429,10 @@ int TreeGroupCommand::execute(){
                        m->Treenames.clear();
                        
                        //fills globaldatas tree names
-                       m->Treenames = m->getGroups();
+                       //m->Treenames = m->getGroups();
+            for (int k = 0; k < lookup.size(); k++) {
+                m->Treenames.push_back(lookup[k]->getGroup());
+            }
                
                        if (m->control_pressed) { return 0; }
                        
@@ -453,19 +451,25 @@ int TreeGroupCommand::execute(){
                        readMatrix->setCutoff(cutoff);
        
             ct = NULL;
+            nameMap = NULL;
             if(namefile != ""){        
                 nameMap = new NameAssignment(namefile);
                 nameMap->readMap();
                 readMatrix->read(nameMap);
             }else if (countfile != "") {
                 ct = new CountTable();
-                ct->readTable(countfile);
+                ct->readTable(countfile, true, false);
                 readMatrix->read(ct);
+            }else {
+                readMatrix->read(nameMap);
             }
 
                        list = readMatrix->getListVector();
                        SparseDistanceMatrix* dMatrix = readMatrix->getDMatrix();
-
+            
+            //clear globaldatas old tree names if any
+                       m->Treenames.clear();
+            
                        //make treemap
             if (ct != NULL) { delete ct; }
                        ct = new CountTable();
@@ -477,17 +481,12 @@ int TreeGroupCommand::execute(){
                 nameMap.insert(bin); 
                 gps.insert(bin); 
                 groupMap[bin] = bin;
+                m->Treenames.push_back(bin);
             }
             ct->createTable(nameMap, groupMap, gps);
                        
                        vector<string> namesGroups = ct->getNamesOfGroups();
                        m->setGroups(namesGroups);
-               
-                       //clear globaldatas old tree names if any
-                       m->Treenames.clear();
-            
-                       //fills globaldatas tree names
-                       m->Treenames = m->getGroups();
                        
                        //used in tree constructor 
                        m->runParse = false;
@@ -501,7 +500,9 @@ int TreeGroupCommand::execute(){
                        if (m->control_pressed) { return 0; }
 
                        //create a new filename
-                       string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + getOutputFileNameTag("tree");     
+            map<string, string> variables; 
+            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
+                       string outputFile = getOutputFileName("tree",variables);        
                        outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
                                
                        Tree* newTree = createTree(matrix);
@@ -643,6 +644,7 @@ int TreeGroupCommand::makeSimsShared() {
             }else {
                 m->clearGroups();
                 Groups.clear();
+                m->Treenames.clear();
                 vector<SharedRAbundVector*> temp;
                 for (int i = 0; i < lookup.size(); i++) {
                     if (lookup[i]->getNumSeqs() < subsampleSize) { 
@@ -651,6 +653,7 @@ int TreeGroupCommand::makeSimsShared() {
                     }else { 
                         Groups.push_back(lookup[i]->getGroup()); 
                         temp.push_back(lookup[i]);
+                        m->Treenames.push_back(lookup[i]->getGroup());
                     }
                 } 
                 lookup = temp;
@@ -659,8 +662,18 @@ int TreeGroupCommand::makeSimsShared() {
             
             if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
         }
-        
         numGroups = lookup.size();
+        
+        //sanity check to make sure processors < numComparisions
+        int numDists = 0;
+        for(int i=0;i<numGroups;i++){
+            for(int j=0;j<i;j++){
+                numDists++;
+                if (numDists > processors) { break; }
+            }
+        }
+        if (numDists < processors) { processors = numDists; }
+        
                lines.resize(processors);
                for (int i = 0; i < processors; i++) {
                        lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
@@ -889,6 +902,9 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                 
                 //Close all thread handles and free memory allocations.
                 for(int i=0; i < pDataArray.size(); i++){
+                    if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
+                        m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
+                    }
                     for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
                     
                     for (int k = 0; k < calcDists.size(); k++) {
@@ -912,35 +928,17 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                 thisItersLookup.clear();
                 for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
             }
+            
+            if (m->debug) {  m->mothurOut("[DEBUG]: iter = " + toString(thisIter) + ".\n"); }
                }
-               
+        
+               if (m->debug) {  m->mothurOut("[DEBUG]: done with iters.\n"); }
+            
         if (iters != 1) {
             //we need to find the average distance and standard deviation for each groups distance
+            vector< vector<seqDist>  > calcAverages = m->getAverages(calcDistsTotals);  
             
-            vector< vector<seqDist>  > calcAverages; calcAverages.resize(treeCalculators.size()); 
-            for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
-                calcAverages[i].resize(calcDistsTotals[0][i].size());
-                
-                for (int j = 0; j < calcAverages[i].size(); j++) {
-                    calcAverages[i][j].seq1 = calcDists[i][j].seq1;
-                    calcAverages[i][j].seq2 = calcDists[i][j].seq2;
-                    calcAverages[i][j].dist = 0.0;
-                }
-            }
-            
-            for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
-                for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
-                    for (int j = 0; j < calcAverages[i].size(); j++) {
-                        calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
-                    }
-                }
-            }
-            
-            for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
-                for (int j = 0; j < calcAverages[i].size(); j++) {
-                    calcAverages[i][j].dist /= (float) iters;
-                }
-            }
+            if (m->debug) {  m->mothurOut("[DEBUG]: found averages.\n"); }
             
             //create average tree for each calc
             for (int i = 0; i < calcDists.size(); i++) {
@@ -958,7 +956,12 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                 }
                 
                 //create a new filename
-                string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".ave." + getOutputFileNameTag("tree");                                
+                map<string, string> variables; 
+                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
+                variables["[calc]"] = treeCalculators[i]->getName();
+                variables["[distance]"] = thisLookup[0]->getLabel();
+                variables["[tag]"] = "ave";
+                string outputFile = getOutputFileName("tree",variables);                               
                 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
                 
                 //creates tree from similarity matrix and write out file
@@ -966,12 +969,20 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                 if (newTree != NULL) { writeTree(outputFile, newTree); }                
             }
             
+            if (m->debug) {  m->mothurOut("[DEBUG]: done averages trees.\n"); }
+            
             //create all trees for each calc and find their consensus tree
             for (int i = 0; i < calcDists.size(); i++) {
                 if (m->control_pressed) { break; }
                 
                 //create a new filename
-                string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".all." + getOutputFileNameTag("tree");                                
+                //create a new filename
+                map<string, string> variables; 
+                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
+                variables["[calc]"] = treeCalculators[i]->getName();
+                variables["[distance]"] = thisLookup[0]->getLabel();
+                variables["[tag]"] = "all";
+                string outputFile = getOutputFileName("tree",variables);                               
                 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
                 
                 ofstream outAll;
@@ -991,7 +1002,7 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                         int row = calcDistsTotals[myIter][i][j].seq1;
                         int column = calcDistsTotals[myIter][i][j].seq2;
                         double dist = calcDistsTotals[myIter][i][j].dist;
-                        
+                       
                         matrix[row][column] = dist;
                         matrix[column][row] = dist;
                     }
@@ -1006,13 +1017,18 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                 outAll.close();
                 if (m->control_pressed) { for (int k = 0; k < trees.size(); k++) { delete trees[k]; } }
                 
+                if (m->debug) {  m->mothurOut("[DEBUG]: done all trees.\n"); }
+                
                 Consensus consensus;
                 //clear old tree names if any
                 m->Treenames.clear(); m->Treenames = m->getGroups(); //may have changed if subsample eliminated groups
                 Tree* conTree = consensus.getTree(trees);
                 
+                if (m->debug) {  m->mothurOut("[DEBUG]: done cons tree.\n"); }
+                
                 //create a new filename
-                string conFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".cons." + getOutputFileNameTag("tree");                          
+                variables["[tag]"] = "cons";
+                string conFile = getOutputFileName("tree",variables);                          
                 outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
                 ofstream outTree;
                 m->openOutputFile(conFile, outTree);
@@ -1040,7 +1056,12 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                 }
                 
                 //create a new filename
-                string outputFile = outputDir + m->getRootName(m->getSimpleName(inputfile)) + treeCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + getOutputFileNameTag("tree");                            
+                map<string, string> variables; 
+                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
+                variables["[calc]"] = treeCalculators[i]->getName();
+                variables["[distance]"] = thisLookup[0]->getLabel();
+                variables["[tag]"] = "";
+                string outputFile = getOutputFileName("tree",variables);                                       
                 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
                 
                 //creates tree from similarity matrix and write out file