]> git.donarmstrong.com Git - mothur.git/blobdiff - phylodiversitycommand.cpp
changed random forest output filename
[mothur.git] / phylodiversitycommand.cpp
index b0c11f68ff26dad59634375c80c44ecf0dd0382b..339649c7dcc814ac409e5b8ec1dfd432b544e43d 100644 (file)
 vector<string> PhyloDiversityCommand::setParameters(){ 
        try {
 
-               CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
-        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
-        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
-               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
-               CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
-               CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
-               CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
-               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
-               CommandParameter prarefy("rarefy", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prarefy);
-               CommandParameter psummary("summary", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(psummary);
-               CommandParameter pcollect("collect", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcollect);
-               CommandParameter pscale("scale", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pscale);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","phylodiv",false,true,true); parameters.push_back(ptree);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
+               CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+               CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
+               CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+               CommandParameter prarefy("rarefy", "Boolean", "", "F", "", "", "","rarefy",false,false); parameters.push_back(prarefy);
+               CommandParameter psummary("summary", "Boolean", "", "T", "", "", "","summary",false,false); parameters.push_back(psummary);
+               CommandParameter pcollect("collect", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pcollect);
+               CommandParameter pscale("scale", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pscale);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -63,26 +63,21 @@ string PhyloDiversityCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
-string PhyloDiversityCommand::getOutputFileNameTag(string type, string inputName=""){  
-       try {
-        string outputFileName = "";
-               map<string, vector<string> >::iterator it;
+string PhyloDiversityCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
         
-        //is this a type this command creates
-        it = outputTypes.find(type);
-        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
-        else {
-            if (type == "phylodiv") {  outputFileName =  "phylodiv"; }
-            else if (type == "rarefy") {  outputFileName =  "phylodiv.rarefaction"; }
-            else if (type == "summary") {  outputFileName =  "phylodiv.summary"; }
-            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
-        }
-        return outputFileName;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "PhyloDiversityCommand", "getOutputFileNameTag");
-               exit(1);
-       }
+        if (type == "phylodiv") {  pattern = "[filename],[tag],phylodiv"; } 
+        else if (type == "rarefy") {  pattern = "[filename],[tag],phylodiv.rarefaction"; } 
+        else if (type == "summary") {  pattern = "[filename],[tag],phylodiv.summary"; }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "PhyloDiversityCommand", "getOutputPattern");
+        exit(1);
+    }
 }
 
 //**********************************************************************************************************************
@@ -284,9 +279,12 @@ int PhyloDiversityCommand::execute(){
                        if (m->control_pressed) { delete ct; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } return 0; }
                        
                        ofstream outSum, outRare, outCollect;
-                       string outSumFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + "." + getOutputFileNameTag("summary");
-                       string outRareFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + "." + getOutputFileNameTag("rarefy");
-                       string outCollectFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + "." + getOutputFileNameTag("phylodiv");
+            map<string, string> variables; 
+            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
+            variables["[tag]"] = toString(i+1);
+                       string outSumFile = getOutputFileName("summary",variables);
+                       string outRareFile = getOutputFileName("rarefy",variables);
+                       string outCollectFile = getOutputFileName("phylodiv",variables);
                        
                        if (summary)    { m->openOutputFile(outSumFile, outSum); outputNames.push_back(outSumFile);             outputTypes["summary"].push_back(outSumFile);                   }
                        if (rarefy)             { m->openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile);  outputTypes["rarefy"].push_back(outRareFile);                   }
@@ -301,6 +299,31 @@ int PhyloDiversityCommand::execute(){
                                        randomLeaf.push_back(j); 
                                }
                        }
+            
+           /* float sum = 0;
+            vector<float> sums; sums.resize(m->getGroups().size(), 0);
+            vector<float> sumsAboveRoot; sumsAboveRoot.resize(m->getGroups().size(), 0);
+            for (int j = 0; j < trees[i]->getNumNodes(); j++) { 
+                if (trees[i]->tree[j].getBranchLength() < 0) { cout << j << '\t' << trees[i]->tree[j].getName() << '\t' << trees[i]->tree[j].getBranchLength() << endl; }
+                
+                               sum += abs(trees[i]->tree[j].getBranchLength());
+                for (int k = 0; k < m->getGroups().size(); k++) {
+                    map<string, int>::iterator itGroup = trees[i]->tree[j].pcount.find(m->getGroups()[k]);
+                    if (itGroup != trees[i]->tree[j].pcount.end()) {  //this branch belongs to a group we care about
+                        if (j < rootForGroup[m->getGroups()[k]]) {
+                            sums[k] += abs(trees[i]->tree[j].getBranchLength());
+                        }else {
+                            sumsAboveRoot[k] += abs(trees[i]->tree[j].getBranchLength());
+                        }
+                    } 
+                }
+                       }
+            cout << sum << endl; //exit(1);
+            
+            for (int k = 0; k < m->getGroups().size(); k++) {
+                cout << m->getGroups()[k] << "root node = " << rootForGroup[m->getGroups()[k]] << "sum below root = " << sums[k] << "sum above root = " << sumsAboveRoot[k] << endl;
+            }
+             exit(1);  */ 
                        
                        numLeafNodes = randomLeaf.size();  //reset the number of leaf nodes you are using 
                        
@@ -339,34 +362,22 @@ int PhyloDiversityCommand::execute(){
                                if (numSampledList.count(diversity[mGroups[j]].size()-1) == 0) {  numSampledList.insert(diversity[mGroups[j]].size()-1); }
                        }
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-                               if(processors == 1){
-                                       driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
-                               }else{
-                                       if (rarefy) {
-                                               vector<int> procIters;
-                                               
-                                               int numItersPerProcessor = iters / processors;
-                                               
-                                               //divide iters between processes
-                                               for (int h = 0; h < processors; h++) {
-                                                       if(h == processors - 1){
-                                                               numItersPerProcessor = iters - h * numItersPerProcessor;
-                                                       }
-                                                       procIters.push_back(numItersPerProcessor);
-                                               }
-                                               
-                                               createProcesses(procIters, trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum); 
-                                               
-                                       }else{ //no need to paralellize if you dont want to rarefy
-                                               driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
-                                       }
-                               }
-
-                       #else
-                               driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
-                       #endif
-
+            if (rarefy) {
+                vector<int> procIters;
+                int numItersPerProcessor = iters / processors;
+                
+                //divide iters between processes
+                for (int h = 0; h < processors; h++) {
+                    if(h == processors - 1){ numItersPerProcessor = iters - h * numItersPerProcessor; }
+                    procIters.push_back(numItersPerProcessor);
+                }
+                
+                createProcesses(procIters, trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum);
+                
+            }else{ //no need to paralellize if you dont want to rarefy
+                driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);     
+            }
+                               
                        if (rarefy) {   printData(numSampledList, sumDiversity, outRare, iters);        }
                }
                
@@ -392,12 +403,13 @@ int PhyloDiversityCommand::execute(){
 //**********************************************************************************************************************
 int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum){
        try {
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-               int process = 1;
+        int process = 1;
                
                vector<int> processIDS;
                map< string, vector<float> >::iterator itSum;
-               
+
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                               
                //loop through and create all the processes you want
                while (process != processors) {
                        int pid = fork();
@@ -466,6 +478,56 @@ int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map<
                        in.close();
                        m->mothurRemove(inTemp);
                }
+#else
+        
+        //fill in functions
+        vector<phylodivData*> pDataArray;
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1];
+        vector<CountTable*> cts;
+        vector<Tree*> trees;
+        map<string, int> rootForGroup = getRootForGroups(t);
+               
+               //Create processor worker threads.
+               for( int i=1; i<processors; i++ ){
+            CountTable* copyCount = new CountTable();
+            copyCount->copy(ct);
+            Tree* copyTree = new Tree(copyCount);
+            copyTree->getCopy(t);
+            
+            cts.push_back(copyCount);
+            trees.push_back(copyTree);
+            
+            map<string, vector<float> > copydiv = div;
+            map<string, vector<float> > copysumDiv = sumDiv;
+            vector<int> copyrandomLeaf = randomLeaf;
+            set<int> copynumSampledList = numSampledList;
+            map<string, int> copyRootForGrouping = rootForGroup;
+            
+            phylodivData* temp = new phylodivData(m, procIters[i], copydiv, copysumDiv, copyTree, copyCount, increment, copyrandomLeaf, copynumSampledList, copyRootForGrouping);
+                       pDataArray.push_back(temp);
+                       processIDS.push_back(i);
+            
+                       hThreadArray[i-1] = CreateThread(NULL, 0, MyPhyloDivThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+               }
+               
+               driver(t, div, sumDiv, procIters[0], increment, randomLeaf, numSampledList, outCollect, outSum, true);
+               
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+            for (itSum = pDataArray[i]->sumDiv.begin(); itSum != pDataArray[i]->sumDiv.end(); itSum++) {
+                for (int k = 0; k < (itSum->second).size(); k++) {
+                    sumDiv[itSum->first][k] += (itSum->second)[k];
+                }
+            }
+                       delete cts[i];
+            delete trees[i];
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
                
 #endif
 
@@ -571,6 +633,7 @@ void PhyloDiversityCommand::printSumData(map< string, vector<float> >& div, ofst
                        else            {       score = div[mGroups[j]][numSampled] / (float)numIters;  }
                                
                        out << setprecision(4) << score << endl;
+            //cout << mGroups[j] << '\t' << numSampled << '\t'<< setprecision(4) << score << endl;
                }
                                        
                out.close();
@@ -635,7 +698,7 @@ vector<float> PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, vector<
         //you are a leaf
                if(t->tree[index].getBranchLength() != -1){     
                        for (int k = 0; k < groups.size(); k++) { 
-                               sums[k] += abs(t->tree[index].getBranchLength());       
+                sums[k] += abs(t->tree[index].getBranchLength());      
                        }
                }
         
@@ -686,10 +749,10 @@ map<string, int> PhyloDiversityCommand::getRootForGroups(Tree* t){
             
             for (int j = 0; j < groups.size(); j++) {
                 
-                if (done[groups[j]] == false) { //we haven't found the root for this group yet
-                    
-                    done[groups[j]] = true;
-                    roots[groups[j]] = i; //set root to self to start
+                    if (done[groups[j]] == false) { //we haven't found the root for this group yet, initialize it
+                        done[groups[j]] = true;
+                        roots[groups[j]] = i; //set root to self to start
+                    }
                     
                     //while you aren't at root
                     while(t->tree[index].getParent() != -1){
@@ -709,15 +772,17 @@ map<string, int> PhyloDiversityCommand::getRootForGroups(Tree* t){
                         if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++;  } 
                         
                         if ((LpcountSize != 0) && (RpcountSize != 0)) { //possible root
-                            roots[groups[j]] = index;
+                            if (index > roots[groups[j]]) {  roots[groups[j]] = index; }
                         }else { ;}
                         
                         index = t->tree[index].getParent();    
                     }
-                }
+                //}
             }
         }
         
+        
+        
         return roots;
         
        }