]> git.donarmstrong.com Git - mothur.git/blob - phylodiversitycommand.cpp
added modify names parameter to set.dir
[mothur.git] / phylodiversitycommand.cpp
1 /*
2  *  phylodiversitycommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/30/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "phylodiversitycommand.h"
11 #include "treereader.h"
12
13 //**********************************************************************************************************************
14 vector<string> PhyloDiversityCommand::setParameters(){  
15         try {
16
17                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","phylodiv",false,true,true); parameters.push_back(ptree);
18         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
19         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
20                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
21                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
22                 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
23                 CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25                 CommandParameter prarefy("rarefy", "Boolean", "", "F", "", "", "","rarefy",false,false); parameters.push_back(prarefy);
26                 CommandParameter psummary("summary", "Boolean", "", "T", "", "", "","summary",false,false); parameters.push_back(psummary);
27                 CommandParameter pcollect("collect", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pcollect);
28                 CommandParameter pscale("scale", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pscale);
29                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
31                 
32                 vector<string> myArray;
33                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
34                 return myArray;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "PhyloDiversityCommand", "setParameters");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 string PhyloDiversityCommand::getHelpString(){  
43         try {
44                 string helpString = "";
45                 helpString += "The phylo.diversity command parameters are tree, group, name, count, groups, iters, freq, processors, scale, rarefy, collect and summary.  tree and group are required, unless you have valid current files.\n";
46                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. The group names are separated by dashes. By default all groups are used.\n";
47                 helpString += "The iters parameter allows you to specify the number of randomizations to preform, by default iters=1000, if you set rarefy to true.\n";
48                 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
49                 helpString += "The scale parameter is used indicate that you want your output scaled to the number of sequences sampled, default = false. \n";
50                 helpString += "The rarefy parameter allows you to create a rarefaction curve. The default is false.\n";
51                 helpString += "The collect parameter allows you to create a collectors curve. The default is false.\n";
52                 helpString += "The summary parameter allows you to create a .summary file. The default is true.\n";
53                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
54                 helpString += "The phylo.diversity command should be in the following format: phylo.diversity(groups=yourGroups, rarefy=yourRarefy, iters=yourIters).\n";
55                 helpString += "Example phylo.diversity(groups=A-B-C, rarefy=T, iters=500).\n";
56                 helpString += "The phylo.diversity command output two files: .phylo.diversity and if rarefy=T, .rarefaction.\n";
57                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
58                 return helpString;
59         }
60         catch(exception& e) {
61                 m->errorOut(e, "PhyloDiversityCommand", "getHelpString");
62                 exit(1);
63         }
64 }
65 //**********************************************************************************************************************
66 string PhyloDiversityCommand::getOutputPattern(string type) {
67     try {
68         string pattern = "";
69         
70         if (type == "phylodiv") {  pattern = "[filename],[tag],phylodiv"; } 
71         else if (type == "rarefy") {  pattern = "[filename],[tag],phylodiv.rarefaction"; } 
72         else if (type == "summary") {  pattern = "[filename],[tag],phylodiv.summary"; }
73         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
74         
75         return pattern;
76     }
77     catch(exception& e) {
78         m->errorOut(e, "PhyloDiversityCommand", "getOutputPattern");
79         exit(1);
80     }
81 }
82
83 //**********************************************************************************************************************
84 PhyloDiversityCommand::PhyloDiversityCommand(){ 
85         try {
86                 abort = true; calledHelp = true; 
87                 setParameters();
88                 vector<string> tempOutNames;
89                 outputTypes["phylodiv"] = tempOutNames;
90                 outputTypes["rarefy"] = tempOutNames;
91                 outputTypes["summary"] = tempOutNames;
92         }
93         catch(exception& e) {
94                 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
95                 exit(1);
96         }
97 }
98 //**********************************************************************************************************************
99 PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
100         try {
101                 abort = false; calledHelp = false;   
102                 
103                 //allow user to run help
104                 if(option == "help") { help(); abort = true; calledHelp = true; }
105                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
106                 
107                 else {
108                         vector<string> myArray = setParameters();;
109                         
110                         OptionParser parser(option);
111                         map<string,string> parameters = parser.getParameters();
112                         map<string,string>::iterator it;
113                         
114                         ValidParameters validParameter;
115                 
116                         //check to make sure all parameters are valid for command
117                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
118                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
119                         }
120                         
121                         //initialize outputTypes
122                         vector<string> tempOutNames;
123                         outputTypes["phylodiv"] = tempOutNames;
124                         outputTypes["rarefy"] = tempOutNames;
125                         outputTypes["summary"] = tempOutNames;
126                         
127                         //if the user changes the input directory command factory will send this info to us in the output parameter 
128                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
129                         if (inputDir == "not found"){   inputDir = "";          }
130                         else {
131                                 string path;
132                                 it = parameters.find("tree");
133                                 //user has given a template file
134                                 if(it != parameters.end()){ 
135                                         path = m->hasPath(it->second);
136                                         //if the user has not given a path then, add inputdir. else leave path alone.
137                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
138                                 }
139                                 
140                                 it = parameters.find("group");
141                                 //user has given a template file
142                                 if(it != parameters.end()){ 
143                                         path = m->hasPath(it->second);
144                                         //if the user has not given a path then, add inputdir. else leave path alone.
145                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
146                                 }
147                                 
148                                 it = parameters.find("name");
149                                 //user has given a template file
150                                 if(it != parameters.end()){ 
151                                         path = m->hasPath(it->second);
152                                         //if the user has not given a path then, add inputdir. else leave path alone.
153                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
154                                 }
155                 
156                 it = parameters.find("count");
157                                 //user has given a template file
158                                 if(it != parameters.end()){ 
159                                         path = m->hasPath(it->second);
160                                         //if the user has not given a path then, add inputdir. else leave path alone.
161                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
162                                 }
163                         }
164                         
165                         //check for required parameters
166                         treefile = validParameter.validFile(parameters, "tree", true);
167                         if (treefile == "not open") { treefile = ""; abort = true; }
168                         else if (treefile == "not found") {                             
169                                 //if there is a current design file, use it
170                                 treefile = m->getTreeFile(); 
171                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
172                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
173                         }else { m->setTreeFile(treefile); }     
174                         
175                         //check for required parameters
176                         groupfile = validParameter.validFile(parameters, "group", true);
177                         if (groupfile == "not open") { groupfile = ""; abort = true; }
178                         else if (groupfile == "not found") { groupfile = ""; }
179                         else { m->setGroupFile(groupfile); }
180                         
181                         namefile = validParameter.validFile(parameters, "name", true);
182                         if (namefile == "not open") { namefile = ""; abort = true; }
183                         else if (namefile == "not found") { namefile = ""; }
184                         else { m->setNameFile(namefile); }
185                         
186             countfile = validParameter.validFile(parameters, "count", true);
187                         if (countfile == "not open") { countfile = ""; abort = true; }
188                         else if (countfile == "not found") { countfile = "";  } 
189                         else { m->setCountTableFile(countfile); }
190             
191             if ((namefile != "") && (countfile != "")) {
192                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
193             }
194                         
195             if ((groupfile != "") && (countfile != "")) {
196                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
197             }
198
199                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
200                         
201                         string temp;
202                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
203                         m->mothurConvert(temp, freq); 
204                         
205                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
206                         m->mothurConvert(temp, iters); 
207                         
208                         temp = validParameter.validFile(parameters, "rarefy", false);                   if (temp == "not found") { temp = "F"; }
209                         rarefy = m->isTrue(temp);
210                         if (!rarefy) { iters = 1;  }
211                         
212                         temp = validParameter.validFile(parameters, "summary", false);                  if (temp == "not found") { temp = "T"; }
213                         summary = m->isTrue(temp);
214                         
215                         temp = validParameter.validFile(parameters, "scale", false);                    if (temp == "not found") { temp = "F"; }
216                         scale = m->isTrue(temp);
217                         
218                         temp = validParameter.validFile(parameters, "collect", false);                  if (temp == "not found") { temp = "F"; }
219                         collect = m->isTrue(temp);
220                         
221                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
222                         m->setProcessors(temp);
223                         m->mothurConvert(temp, processors); 
224                         
225                         groups = validParameter.validFile(parameters, "groups", false);                 
226                         if (groups == "not found") { groups = "";  }
227                         else { 
228                                 m->splitAtDash(groups, Groups);
229                                 m->setGroups(Groups);
230                         }
231                         
232                         if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default."); m->mothurOutEndLine(); abort=true; }
233                         
234                         if (countfile=="") {
235                 if (namefile == "") {
236                     vector<string> files; files.push_back(treefile);
237                     parser.getNameFile(files);
238                 } 
239             }
240                 }
241                 
242         }
243         catch(exception& e) {
244                 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
245                 exit(1);
246         }                       
247 }
248 //**********************************************************************************************************************
249
250 int PhyloDiversityCommand::execute(){
251         try {
252                 
253                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
254                 
255         int start = time(NULL);
256         
257                 m->setTreeFile(treefile);
258         TreeReader* reader;
259         if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
260         else { reader = new TreeReader(treefile, countfile); }
261         vector<Tree*> trees = reader->getTrees();
262         ct = trees[0]->getCountTable();
263         delete reader;
264
265                 SharedUtil util;
266                 vector<string> mGroups = m->getGroups();
267                 vector<string> tGroups = ct->getNamesOfGroups();
268                 util.setGroups(mGroups, tGroups, "phylo.diversity");    //sets the groups the user wants to analyze
269                 
270                 //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
271                 for (int i = 0; i < mGroups.size(); i++) { if (mGroups[i] == "xxx") { mGroups.erase(mGroups.begin()+i);  break; }  }
272                 m->setGroups(mGroups);
273                  
274                 vector<string> outputNames;
275                 
276                 //for each of the users trees
277                 for(int i = 0; i < trees.size(); i++) {
278                 
279                         if (m->control_pressed) { delete ct; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } return 0; }
280                         
281                         ofstream outSum, outRare, outCollect;
282             map<string, string> variables; 
283             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
284             variables["[tag]"] = toString(i+1);
285                         string outSumFile = getOutputFileName("summary",variables);
286                         string outRareFile = getOutputFileName("rarefy",variables);
287                         string outCollectFile = getOutputFileName("phylodiv",variables);
288                         
289                         if (summary)    { m->openOutputFile(outSumFile, outSum); outputNames.push_back(outSumFile);             outputTypes["summary"].push_back(outSumFile);                   }
290                         if (rarefy)             { m->openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile);  outputTypes["rarefy"].push_back(outRareFile);                   }
291                         if (collect)    { m->openOutputFile(outCollectFile, outCollect); outputNames.push_back(outCollectFile);  outputTypes["phylodiv"].push_back(outCollectFile);  }
292                         
293                         int numLeafNodes = trees[i]->getNumLeaves();
294                         
295                         //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
296                         vector<int> randomLeaf;
297                         for (int j = 0; j < numLeafNodes; j++) {  
298                                 if (m->inUsersGroups(trees[i]->tree[j].getGroup(), mGroups) == true) { //is this a node from the group the user selected.
299                                         randomLeaf.push_back(j); 
300                                 }
301                         }
302             
303            /* float sum = 0;
304             vector<float> sums; sums.resize(m->getGroups().size(), 0);
305             vector<float> sumsAboveRoot; sumsAboveRoot.resize(m->getGroups().size(), 0);
306             for (int j = 0; j < trees[i]->getNumNodes(); j++) { 
307                 if (trees[i]->tree[j].getBranchLength() < 0) { cout << j << '\t' << trees[i]->tree[j].getName() << '\t' << trees[i]->tree[j].getBranchLength() << endl; }
308                 
309                                 sum += abs(trees[i]->tree[j].getBranchLength());
310                 for (int k = 0; k < m->getGroups().size(); k++) {
311                     map<string, int>::iterator itGroup = trees[i]->tree[j].pcount.find(m->getGroups()[k]);
312                     if (itGroup != trees[i]->tree[j].pcount.end()) {  //this branch belongs to a group we care about
313                         if (j < rootForGroup[m->getGroups()[k]]) {
314                             sums[k] += abs(trees[i]->tree[j].getBranchLength());
315                         }else {
316                             sumsAboveRoot[k] += abs(trees[i]->tree[j].getBranchLength());
317                         }
318                     } 
319                 }
320                         }
321             cout << sum << endl; //exit(1);
322             
323             for (int k = 0; k < m->getGroups().size(); k++) {
324                 cout << m->getGroups()[k] << "root node = " << rootForGroup[m->getGroups()[k]] << "sum below root = " << sums[k] << "sum above root = " << sumsAboveRoot[k] << endl;
325             }
326              exit(1);  */ 
327                         
328                         numLeafNodes = randomLeaf.size();  //reset the number of leaf nodes you are using 
329                         
330                         //each group, each sampling, if no rarefy iters = 1;
331                         map<string, vector<float> > diversity;
332                         
333                         //each group, each sampling, if no rarefy iters = 1;
334                         map<string, vector<float> > sumDiversity;
335                         
336                         //find largest group total 
337                         int largestGroup = 0;
338                         for (int j = 0; j < mGroups.size(); j++) { 
339                 int numSeqsThisGroup = ct->getGroupCount(mGroups[j]);
340                                 if (numSeqsThisGroup > largestGroup) { largestGroup = numSeqsThisGroup; }
341                                 
342                                 //initialize diversity
343                                 diversity[mGroups[j]].resize(numSeqsThisGroup+1, 0.0);          //numSampled
344                                                                                                                                                                                                                         //groupA                0.0                     0.0
345                                                                                                                                                                                                                         
346                                 //initialize sumDiversity
347                                 sumDiversity[mGroups[j]].resize(numSeqsThisGroup+1, 0.0);
348                         }       
349
350                         //convert freq percentage to number
351                         int increment = 100;
352                         if (freq < 1.0) {  increment = largestGroup * freq;  
353                         }else { increment = freq;  }
354                         
355                         //initialize sampling spots
356                         set<int> numSampledList;
357                         for(int k = 1; k <= largestGroup; k++){  if((k == 1) || (k % increment == 0)){  numSampledList.insert(k); }   }
358                         if(largestGroup % increment != 0){      numSampledList.insert(largestGroup);   }
359                         
360                         //add other groups ending points
361                         for (int j = 0; j < mGroups.size(); j++) {  
362                                 if (numSampledList.count(diversity[mGroups[j]].size()-1) == 0) {  numSampledList.insert(diversity[mGroups[j]].size()-1); }
363                         }
364                         
365             if (rarefy) {
366                 vector<int> procIters;
367                 int numItersPerProcessor = iters / processors;
368                 
369                 //divide iters between processes
370                 for (int h = 0; h < processors; h++) {
371                     if(h == processors - 1){ numItersPerProcessor = iters - h * numItersPerProcessor; }
372                     procIters.push_back(numItersPerProcessor);
373                 }
374                 
375                 createProcesses(procIters, trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum);
376                 
377             }else{ //no need to paralellize if you dont want to rarefy
378                 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
379             }
380                                 
381                         if (rarefy) {   printData(numSampledList, sumDiversity, outRare, iters);        }
382                 }
383                 
384         
385                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } return 0; }
386
387         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run phylo.diversity."); m->mothurOutEndLine();
388
389         
390                 m->mothurOutEndLine();
391                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
392                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
393                 m->mothurOutEndLine();
394
395                 
396                 return 0;
397         }
398         catch(exception& e) {
399                 m->errorOut(e, "PhyloDiversityCommand", "execute");
400                 exit(1);
401         }
402 }
403 //**********************************************************************************************************************
404 int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum){
405         try {
406         int process = 1;
407                 
408                 vector<int> processIDS;
409                 map< string, vector<float> >::iterator itSum;
410
411                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
412                                 
413                 //loop through and create all the processes you want
414                 while (process != processors) {
415                         int pid = fork();
416                         
417                         if (pid > 0) {
418                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
419                                 process++;
420                         }else if (pid == 0){
421                                 driver(t, div, sumDiv, procIters[process], increment, randomLeaf, numSampledList, outCollect, outSum, false);
422                                 
423                                 string outTemp = outputDir + toString(getpid()) + ".sumDiv.temp";
424                                 ofstream out;
425                                 m->openOutputFile(outTemp, out);
426                                 
427                                 //output the sumDIversity
428                                 for (itSum = sumDiv.begin(); itSum != sumDiv.end(); itSum++) {
429                                         out << itSum->first << '\t' << (itSum->second).size() << '\t';
430                                         for (int k = 0; k < (itSum->second).size(); k++) { 
431                                                 out << (itSum->second)[k] << '\t';
432                                         }
433                                         out << endl;
434                                 }
435                                 
436                                 out.close();
437                                 
438                                 exit(0);
439                         }else { 
440                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
441                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
442                                 exit(0);
443                         }
444                 }
445                 
446                 driver(t, div, sumDiv, procIters[0], increment, randomLeaf, numSampledList, outCollect, outSum, true);
447                 
448                 //force parent to wait until all the processes are done
449                 for (int i=0;i<(processors-1);i++) { 
450                         int temp = processIDS[i];
451                         wait(&temp);
452                 }
453                 
454                 //get data created by processes
455                 for (int i=0;i<(processors-1);i++) { 
456                         
457                         //input the sumDIversity
458                         string inTemp = outputDir + toString(processIDS[i]) + ".sumDiv.temp";
459                         ifstream in;
460                         m->openInputFile(inTemp, in);
461                                 
462                         //output the sumDIversity
463                         for (int j = 0; j < sumDiv.size(); j++) { 
464                                 string group = "";
465                                 int size = 0;
466                                 
467                                 in >> group >> size; m->gobble(in);
468                                 
469                                 for (int k = 0; k < size; k++) { 
470                                         float tempVal;
471                                         in >> tempVal;
472                                         
473                                         sumDiv[group][k] += tempVal;
474                                 }
475                                 m->gobble(in);
476                         }
477                                 
478                         in.close();
479                         m->mothurRemove(inTemp);
480                 }
481 #else
482         
483         //fill in functions
484         vector<phylodivData*> pDataArray;
485                 DWORD   dwThreadIdArray[processors-1];
486                 HANDLE  hThreadArray[processors-1];
487         vector<CountTable*> cts;
488         vector<Tree*> trees;
489         map<string, int> rootForGroup = getRootForGroups(t);
490                 
491                 //Create processor worker threads.
492                 for( int i=1; i<processors; i++ ){
493             CountTable* copyCount = new CountTable();
494             copyCount->copy(ct);
495             Tree* copyTree = new Tree(copyCount);
496             copyTree->getCopy(t);
497             
498             cts.push_back(copyCount);
499             trees.push_back(copyTree);
500             
501             map<string, vector<float> > copydiv = div;
502             map<string, vector<float> > copysumDiv = sumDiv;
503             vector<int> copyrandomLeaf = randomLeaf;
504             set<int> copynumSampledList = numSampledList;
505             map<string, int> copyRootForGrouping = rootForGroup;
506             
507             phylodivData* temp = new phylodivData(m, procIters[i], copydiv, copysumDiv, copyTree, copyCount, increment, copyrandomLeaf, copynumSampledList, copyRootForGrouping);
508                         pDataArray.push_back(temp);
509                         processIDS.push_back(i);
510             
511                         hThreadArray[i-1] = CreateThread(NULL, 0, MyPhyloDivThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
512                 }
513                 
514                 driver(t, div, sumDiv, procIters[0], increment, randomLeaf, numSampledList, outCollect, outSum, true);
515                 
516                 //Wait until all threads have terminated.
517                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
518                 
519                 //Close all thread handles and free memory allocations.
520                 for(int i=0; i < pDataArray.size(); i++){
521             for (itSum = pDataArray[i]->sumDiv.begin(); itSum != pDataArray[i]->sumDiv.end(); itSum++) {
522                 for (int k = 0; k < (itSum->second).size(); k++) {
523                     sumDiv[itSum->first][k] += (itSum->second)[k];
524                 }
525             }
526                         delete cts[i];
527             delete trees[i];
528                         CloseHandle(hThreadArray[i]);
529                         delete pDataArray[i];
530                 }
531                 
532 #endif
533
534         return 0;               
535         
536         }
537         catch(exception& e) {
538                 m->errorOut(e, "PhyloDiversityCommand", "createProcesses");
539                 exit(1);
540         }
541 }
542 //**********************************************************************************************************************
543 int PhyloDiversityCommand::driver(Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum, bool doSumCollect){
544         try {
545                 int numLeafNodes = randomLeaf.size();
546                 vector<string> mGroups = m->getGroups();
547         
548         map<string, int> rootForGroup = getRootForGroups(t); //maps groupName to root node in tree. "root" for group may not be the trees root and we don't want to include the extra branches.
549         
550                 for (int l = 0; l < numIters; l++) {
551                                 random_shuffle(randomLeaf.begin(), randomLeaf.end());
552          
553                                 //initialize counts
554                                 map<string, int> counts;
555                 vector< map<string, bool> > countedBranch;
556                 for (int i = 0; i < t->getNumNodes(); i++) {
557                     map<string, bool> temp;
558                     for (int j = 0; j < mGroups.size(); j++) { temp[mGroups[j]] = false; }
559                     countedBranch.push_back(temp);
560                 }
561             
562                                 for (int j = 0; j < mGroups.size(); j++) {  counts[mGroups[j]] = 0;   }  
563                                 
564                                 for(int k = 0; k < numLeafNodes; k++){
565                                                 
566                                         if (m->control_pressed) { return 0; }
567                                         
568                                         //calc branch length of randomLeaf k
569                     vector<float> br = calcBranchLength(t, randomLeaf[k], countedBranch, rootForGroup);
570                         
571                                         //for each group in the groups update the total branch length accounting for the names file
572                                         vector<string> groups = t->tree[randomLeaf[k]].getGroup();
573                                         
574                                         for (int j = 0; j < groups.size(); j++) {
575                         
576                         if (m->inUsersGroups(groups[j], mGroups)) {
577                             int numSeqsInGroupJ = 0;
578                             map<string, int>::iterator it;
579                             it = t->tree[randomLeaf[k]].pcount.find(groups[j]);
580                             if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
581                                 numSeqsInGroupJ = it->second;
582                             }
583                             
584                             if (numSeqsInGroupJ != 0) { div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j];  }
585                             
586                             for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
587                                 div[groups[j]][s] = div[groups[j]][s-1];  //update counts, but don't add in redundant branch lengths
588                             }
589                             counts[groups[j]] += numSeqsInGroupJ;
590                         }
591                                         }
592                                 }
593                                 
594                                 if (rarefy) {
595                                         //add this diversity to the sum
596                                         for (int j = 0; j < mGroups.size(); j++) {  
597                                                 for (int g = 0; g < div[mGroups[j]].size(); g++) {
598                                                         sumDiv[mGroups[j]][g] += div[mGroups[j]][g];
599                                                 }
600                                         }
601                                 }
602                                 
603                                 if ((collect) && (l == 0) && doSumCollect) {  printData(numSampledList, div, outCollect, 1);  }
604                                 if ((summary) && (l == 0) && doSumCollect) {  printSumData(div, outSum, 1);  }
605                         }
606                         
607                         return 0;
608
609         }
610         catch(exception& e) {
611                 m->errorOut(e, "PhyloDiversityCommand", "driver");
612                 exit(1);
613         }
614 }
615
616 //**********************************************************************************************************************
617
618 void PhyloDiversityCommand::printSumData(map< string, vector<float> >& div, ofstream& out, int numIters){
619         try {
620                 
621                 out << "Groups\tnumSampled\tphyloDiversity" << endl;
622                 
623                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
624                 
625                 vector<string> mGroups = m->getGroups();
626                 for (int j = 0; j < mGroups.size(); j++) {
627                         int numSampled = (div[mGroups[j]].size()-1);
628                         out << mGroups[j] << '\t' << numSampled << '\t';
629                 
630                          
631                         float score;
632                         if (scale)      {  score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; }
633                         else            {       score = div[mGroups[j]][numSampled] / (float)numIters;  }
634                                 
635                         out << setprecision(4) << score << endl;
636             //cout << mGroups[j] << '\t' << numSampled << '\t'<< setprecision(4) << score << endl;
637                 }
638                                         
639                 out.close();
640                 
641         }
642         catch(exception& e) {
643                 m->errorOut(e, "PhyloDiversityCommand", "printSumData");
644                 exit(1);
645         }
646 }
647 //**********************************************************************************************************************
648
649 void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float> >& div, ofstream& out, int numIters){
650         try {
651                 
652                 out << "numSampled\t";
653                 vector<string> mGroups = m->getGroups();
654                 for (int i = 0; i < mGroups.size(); i++) { out << mGroups[i] << '\t';  }
655                 out << endl;
656                 
657                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
658                 
659                 for (set<int>::iterator it = num.begin(); it != num.end(); it++) {  
660                         int numSampled = *it;
661                         
662                         out << numSampled << '\t';  
663                 
664                         for (int j = 0; j < mGroups.size(); j++) {
665                                 if (numSampled < div[mGroups[j]].size()) { 
666                                         float score;
667                                         if (scale)      {  score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; }
668                                         else            {       score = div[mGroups[j]][numSampled] / (float)numIters;  }
669
670                                         out << setprecision(4) << score << '\t';
671                                 }else { out << "NA" << '\t'; }
672                         }
673                         out << endl;
674                 }
675                 
676                 out.close();
677                 
678         }
679         catch(exception& e) {
680                 m->errorOut(e, "PhyloDiversityCommand", "printData");
681                 exit(1);
682         }
683 }
684 //**********************************************************************************************************************
685 //need a vector of floats one branch length for every group the node represents.
686 vector<float> PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, vector< map<string, bool> >& counted, map<string, int> roots){
687         try {
688         
689                 //calc the branch length
690                 //while you aren't at root
691                 vector<float> sums; 
692                 int index = leaf;
693                 
694                 vector<string> groups = t->tree[leaf].getGroup();
695                 sums.resize(groups.size(), 0.0);
696                 
697         
698         //you are a leaf
699                 if(t->tree[index].getBranchLength() != -1){     
700                         for (int k = 0; k < groups.size(); k++) { 
701                 sums[k] += abs(t->tree[index].getBranchLength());       
702                         }
703                 }
704         
705         
706         index = t->tree[index].getParent();     
707         
708                 //while you aren't at root
709                 while(t->tree[index].getParent() != -1){
710             
711                         if (m->control_pressed) {  return sums; }
712                         
713                         for (int k = 0; k < groups.size(); k++) {
714                 
715                 if (index >= roots[groups[k]]) { counted[index][groups[k]] = true; } //if you are at this groups "root", then say we are done
716                 
717                 if (!counted[index][groups[k]]){ //if counted[index][groups[k] is true this groups has already added all br from here to root, so quit early
718                     if (t->tree[index].getBranchLength() != -1) {
719                         sums[k] += abs(t->tree[index].getBranchLength());
720                     }
721                     counted[index][groups[k]] = true;
722                 }
723             }
724             index = t->tree[index].getParent(); 
725         }
726         
727                 return sums;
728         
729         }
730         catch(exception& e) {
731                 m->errorOut(e, "PhyloDiversityCommand", "calcBranchLength");
732                 exit(1);
733         }
734 }
735 //**********************************************************************************************************************
736 map<string, int> PhyloDiversityCommand::getRootForGroups(Tree* t){
737         try {
738                 map<string, int> roots; //maps group to root for group, may not be root of tree
739                 map<string, bool> done;
740        
741                 //initialize root for all groups to -1
742                 for (int k = 0; k < (t->getCountTable())->getNamesOfGroups().size(); k++) { done[(t->getCountTable())->getNamesOfGroups()[k]] = false; }
743         
744         for (int i = 0; i < t->getNumLeaves(); i++) {
745             
746             vector<string> groups = t->tree[i].getGroup();
747             
748             int index = t->tree[i].getParent();
749             
750             for (int j = 0; j < groups.size(); j++) {
751                 
752                     if (done[groups[j]] == false) { //we haven't found the root for this group yet, initialize it
753                         done[groups[j]] = true;
754                         roots[groups[j]] = i; //set root to self to start
755                     }
756                     
757                     //while you aren't at root
758                     while(t->tree[index].getParent() != -1){
759                         
760                         if (m->control_pressed) {  return roots; }
761                         
762                         //do both your chidren have have descendants from the users groups? 
763                         int lc = t->tree[index].getLChild();
764                         int rc = t->tree[index].getRChild();
765                         
766                         int LpcountSize = 0;
767                         map<string, int>:: iterator itGroup = t->tree[lc].pcount.find(groups[j]);
768                         if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++;  } 
769                         
770                         int RpcountSize = 0;
771                         itGroup = t->tree[rc].pcount.find(groups[j]);
772                         if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++;  } 
773                         
774                         if ((LpcountSize != 0) && (RpcountSize != 0)) { //possible root
775                             if (index > roots[groups[j]]) {  roots[groups[j]] = index; }
776                         }else { ;}
777                         
778                         index = t->tree[index].getParent();     
779                     }
780                 //}
781             }
782         }
783         
784         
785         
786         return roots;
787         
788         }
789         catch(exception& e) {
790                 m->errorOut(e, "PhyloDiversityCommand", "getRootForGroups");
791                 exit(1);
792         }
793 }
794 //**********************************************************************************************************************
795
796
797