]> git.donarmstrong.com Git - mothur.git/blob - phylodiversitycommand.cpp
major change to the tree class to use the count table class instead of tree map....
[mothur.git] / phylodiversitycommand.cpp
1 /*
2  *  phylodiversitycommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/30/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "phylodiversitycommand.h"
11 #include "treereader.h"
12
13 //**********************************************************************************************************************
14 vector<string> PhyloDiversityCommand::setParameters(){  
15         try {
16
17                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
18         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
19         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
20                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
21                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23                 CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25                 CommandParameter prarefy("rarefy", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prarefy);
26                 CommandParameter psummary("summary", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(psummary);
27                 CommandParameter pcollect("collect", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcollect);
28                 CommandParameter pscale("scale", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pscale);
29                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
31                 
32                 vector<string> myArray;
33                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
34                 return myArray;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "PhyloDiversityCommand", "setParameters");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 string PhyloDiversityCommand::getHelpString(){  
43         try {
44                 string helpString = "";
45                 helpString += "The phylo.diversity command parameters are tree, group, name, count, groups, iters, freq, processors, scale, rarefy, collect and summary.  tree and group are required, unless you have valid current files.\n";
46                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. The group names are separated by dashes. By default all groups are used.\n";
47                 helpString += "The iters parameter allows you to specify the number of randomizations to preform, by default iters=1000, if you set rarefy to true.\n";
48                 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
49                 helpString += "The scale parameter is used indicate that you want your output scaled to the number of sequences sampled, default = false. \n";
50                 helpString += "The rarefy parameter allows you to create a rarefaction curve. The default is false.\n";
51                 helpString += "The collect parameter allows you to create a collectors curve. The default is false.\n";
52                 helpString += "The summary parameter allows you to create a .summary file. The default is true.\n";
53                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
54                 helpString += "The phylo.diversity command should be in the following format: phylo.diversity(groups=yourGroups, rarefy=yourRarefy, iters=yourIters).\n";
55                 helpString += "Example phylo.diversity(groups=A-B-C, rarefy=T, iters=500).\n";
56                 helpString += "The phylo.diversity command output two files: .phylo.diversity and if rarefy=T, .rarefaction.\n";
57                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
58                 return helpString;
59         }
60         catch(exception& e) {
61                 m->errorOut(e, "PhyloDiversityCommand", "getHelpString");
62                 exit(1);
63         }
64 }
65 //**********************************************************************************************************************
66 string PhyloDiversityCommand::getOutputFileNameTag(string type, string inputName=""){   
67         try {
68         string outputFileName = "";
69                 map<string, vector<string> >::iterator it;
70         
71         //is this a type this command creates
72         it = outputTypes.find(type);
73         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
74         else {
75             if (type == "phylodiv") {  outputFileName =  "phylodiv"; }
76             else if (type == "rarefy") {  outputFileName =  "phylodiv.rarefaction"; }
77             else if (type == "summary") {  outputFileName =  "phylodiv.summary"; }
78             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
79         }
80         return outputFileName;
81         }
82         catch(exception& e) {
83                 m->errorOut(e, "PhyloDiversityCommand", "getOutputFileNameTag");
84                 exit(1);
85         }
86 }
87
88 //**********************************************************************************************************************
89 PhyloDiversityCommand::PhyloDiversityCommand(){ 
90         try {
91                 abort = true; calledHelp = true; 
92                 setParameters();
93                 vector<string> tempOutNames;
94                 outputTypes["phylodiv"] = tempOutNames;
95                 outputTypes["rarefy"] = tempOutNames;
96                 outputTypes["summary"] = tempOutNames;
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
100                 exit(1);
101         }
102 }
103 //**********************************************************************************************************************
104 PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
105         try {
106                 abort = false; calledHelp = false;   
107                 
108                 //allow user to run help
109                 if(option == "help") { help(); abort = true; calledHelp = true; }
110                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
111                 
112                 else {
113                         vector<string> myArray = setParameters();;
114                         
115                         OptionParser parser(option);
116                         map<string,string> parameters = parser.getParameters();
117                         map<string,string>::iterator it;
118                         
119                         ValidParameters validParameter;
120                 
121                         //check to make sure all parameters are valid for command
122                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
123                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
124                         }
125                         
126                         //initialize outputTypes
127                         vector<string> tempOutNames;
128                         outputTypes["phylodiv"] = tempOutNames;
129                         outputTypes["rarefy"] = tempOutNames;
130                         outputTypes["summary"] = tempOutNames;
131                         
132                         //if the user changes the input directory command factory will send this info to us in the output parameter 
133                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
134                         if (inputDir == "not found"){   inputDir = "";          }
135                         else {
136                                 string path;
137                                 it = parameters.find("tree");
138                                 //user has given a template file
139                                 if(it != parameters.end()){ 
140                                         path = m->hasPath(it->second);
141                                         //if the user has not given a path then, add inputdir. else leave path alone.
142                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
143                                 }
144                                 
145                                 it = parameters.find("group");
146                                 //user has given a template file
147                                 if(it != parameters.end()){ 
148                                         path = m->hasPath(it->second);
149                                         //if the user has not given a path then, add inputdir. else leave path alone.
150                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
151                                 }
152                                 
153                                 it = parameters.find("name");
154                                 //user has given a template file
155                                 if(it != parameters.end()){ 
156                                         path = m->hasPath(it->second);
157                                         //if the user has not given a path then, add inputdir. else leave path alone.
158                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
159                                 }
160                 
161                 it = parameters.find("count");
162                                 //user has given a template file
163                                 if(it != parameters.end()){ 
164                                         path = m->hasPath(it->second);
165                                         //if the user has not given a path then, add inputdir. else leave path alone.
166                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
167                                 }
168                         }
169                         
170                         //check for required parameters
171                         treefile = validParameter.validFile(parameters, "tree", true);
172                         if (treefile == "not open") { treefile = ""; abort = true; }
173                         else if (treefile == "not found") {                             
174                                 //if there is a current design file, use it
175                                 treefile = m->getTreeFile(); 
176                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
177                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
178                         }else { m->setTreeFile(treefile); }     
179                         
180                         //check for required parameters
181                         groupfile = validParameter.validFile(parameters, "group", true);
182                         if (groupfile == "not open") { groupfile = ""; abort = true; }
183                         else if (groupfile == "not found") { groupfile = ""; }
184                         else { m->setGroupFile(groupfile); }
185                         
186                         namefile = validParameter.validFile(parameters, "name", true);
187                         if (namefile == "not open") { namefile = ""; abort = true; }
188                         else if (namefile == "not found") { namefile = ""; }
189                         else { m->setNameFile(namefile); }
190                         
191             countfile = validParameter.validFile(parameters, "count", true);
192                         if (countfile == "not open") { countfile = ""; abort = true; }
193                         else if (countfile == "not found") { countfile = "";  } 
194                         else { m->setCountTableFile(countfile); }
195             
196             if ((namefile != "") && (countfile != "")) {
197                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
198             }
199                         
200             if ((groupfile != "") && (countfile != "")) {
201                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
202             }
203
204                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
205                         
206                         string temp;
207                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
208                         m->mothurConvert(temp, freq); 
209                         
210                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
211                         m->mothurConvert(temp, iters); 
212                         
213                         temp = validParameter.validFile(parameters, "rarefy", false);                   if (temp == "not found") { temp = "F"; }
214                         rarefy = m->isTrue(temp);
215                         if (!rarefy) { iters = 1;  }
216                         
217                         temp = validParameter.validFile(parameters, "summary", false);                  if (temp == "not found") { temp = "T"; }
218                         summary = m->isTrue(temp);
219                         
220                         temp = validParameter.validFile(parameters, "scale", false);                    if (temp == "not found") { temp = "F"; }
221                         scale = m->isTrue(temp);
222                         
223                         temp = validParameter.validFile(parameters, "collect", false);                  if (temp == "not found") { temp = "F"; }
224                         collect = m->isTrue(temp);
225                         
226                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
227                         m->setProcessors(temp);
228                         m->mothurConvert(temp, processors); 
229                         
230                         groups = validParameter.validFile(parameters, "groups", false);                 
231                         if (groups == "not found") { groups = "";  }
232                         else { 
233                                 m->splitAtDash(groups, Groups);
234                                 m->setGroups(Groups);
235                         }
236                         
237                         if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default."); m->mothurOutEndLine(); abort=true; }
238                         
239                         if (countfile=="") {
240                 if (namefile == "") {
241                     vector<string> files; files.push_back(treefile);
242                     parser.getNameFile(files);
243                 } 
244             }
245                 }
246                 
247         }
248         catch(exception& e) {
249                 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
250                 exit(1);
251         }                       
252 }
253 //**********************************************************************************************************************
254
255 int PhyloDiversityCommand::execute(){
256         try {
257                 
258                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
259                 
260         int start = time(NULL);
261         
262                 m->setTreeFile(treefile);
263         TreeReader* reader;
264         if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
265         else { reader = new TreeReader(treefile, countfile); }
266         vector<Tree*> trees = reader->getTrees();
267         ct = trees[0]->getCountTable();
268         delete reader;
269
270                 SharedUtil util;
271                 vector<string> mGroups = m->getGroups();
272                 vector<string> tGroups = ct->getNamesOfGroups();
273                 util.setGroups(mGroups, tGroups, "phylo.diversity");    //sets the groups the user wants to analyze
274                 
275                 //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
276                 for (int i = 0; i < mGroups.size(); i++) { if (mGroups[i] == "xxx") { mGroups.erase(mGroups.begin()+i);  break; }  }
277                 m->setGroups(mGroups);
278                  
279                 vector<string> outputNames;
280                 
281                 //for each of the users trees
282                 for(int i = 0; i < trees.size(); i++) {
283                 
284                         if (m->control_pressed) { delete ct; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } return 0; }
285                         
286                         ofstream outSum, outRare, outCollect;
287                         string outSumFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + "." + getOutputFileNameTag("summary");
288                         string outRareFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + "." + getOutputFileNameTag("rarefy");
289                         string outCollectFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + "." + getOutputFileNameTag("phylodiv");
290                         
291                         if (summary)    { m->openOutputFile(outSumFile, outSum); outputNames.push_back(outSumFile);             outputTypes["summary"].push_back(outSumFile);                   }
292                         if (rarefy)             { m->openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile);  outputTypes["rarefy"].push_back(outRareFile);                   }
293                         if (collect)    { m->openOutputFile(outCollectFile, outCollect); outputNames.push_back(outCollectFile);  outputTypes["phylodiv"].push_back(outCollectFile);  }
294                         
295                         int numLeafNodes = trees[i]->getNumLeaves();
296                         
297                         //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
298                         vector<int> randomLeaf;
299                         for (int j = 0; j < numLeafNodes; j++) {  
300                                 if (m->inUsersGroups(trees[i]->tree[j].getGroup(), mGroups) == true) { //is this a node from the group the user selected.
301                                         randomLeaf.push_back(j); 
302                                 }
303                         }
304                         
305                         numLeafNodes = randomLeaf.size();  //reset the number of leaf nodes you are using 
306                         
307                         //each group, each sampling, if no rarefy iters = 1;
308                         map<string, vector<float> > diversity;
309                         
310                         //each group, each sampling, if no rarefy iters = 1;
311                         map<string, vector<float> > sumDiversity;
312                         
313                         //find largest group total 
314                         int largestGroup = 0;
315                         for (int j = 0; j < mGroups.size(); j++) { 
316                 int numSeqsThisGroup = ct->getGroupCount(mGroups[j]);
317                                 if (numSeqsThisGroup > largestGroup) { largestGroup = numSeqsThisGroup; }
318                                 
319                                 //initialize diversity
320                                 diversity[mGroups[j]].resize(numSeqsThisGroup+1, 0.0);          //numSampled
321                                                                                                                                                                                                                         //groupA                0.0                     0.0
322                                                                                                                                                                                                                         
323                                 //initialize sumDiversity
324                                 sumDiversity[mGroups[j]].resize(numSeqsThisGroup+1, 0.0);
325                         }       
326
327                         //convert freq percentage to number
328                         int increment = 100;
329                         if (freq < 1.0) {  increment = largestGroup * freq;  
330                         }else { increment = freq;  }
331                         
332                         //initialize sampling spots
333                         set<int> numSampledList;
334                         for(int k = 1; k <= largestGroup; k++){  if((k == 1) || (k % increment == 0)){  numSampledList.insert(k); }   }
335                         if(largestGroup % increment != 0){      numSampledList.insert(largestGroup);   }
336                         
337                         //add other groups ending points
338                         for (int j = 0; j < mGroups.size(); j++) {  
339                                 if (numSampledList.count(diversity[mGroups[j]].size()-1) == 0) {  numSampledList.insert(diversity[mGroups[j]].size()-1); }
340                         }
341                         
342                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
343                                 if(processors == 1){
344                                         driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
345                                 }else{
346                                         if (rarefy) {
347                                                 vector<int> procIters;
348                                                 
349                                                 int numItersPerProcessor = iters / processors;
350                                                 
351                                                 //divide iters between processes
352                                                 for (int h = 0; h < processors; h++) {
353                                                         if(h == processors - 1){
354                                                                 numItersPerProcessor = iters - h * numItersPerProcessor;
355                                                         }
356                                                         procIters.push_back(numItersPerProcessor);
357                                                 }
358                                                 
359                                                 createProcesses(procIters, trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum); 
360                                                 
361                                         }else{ //no need to paralellize if you dont want to rarefy
362                                                 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
363                                         }
364                                 }
365
366                         #else
367                                 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
368                         #endif
369
370                         if (rarefy) {   printData(numSampledList, sumDiversity, outRare, iters);        }
371                 }
372                 
373         
374                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } return 0; }
375
376         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run phylo.diversity."); m->mothurOutEndLine();
377
378         
379                 m->mothurOutEndLine();
380                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
381                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
382                 m->mothurOutEndLine();
383
384                 
385                 return 0;
386         }
387         catch(exception& e) {
388                 m->errorOut(e, "PhyloDiversityCommand", "execute");
389                 exit(1);
390         }
391 }
392 //**********************************************************************************************************************
393 int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum){
394         try {
395                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
396                 int process = 1;
397                 
398                 vector<int> processIDS;
399                 map< string, vector<float> >::iterator itSum;
400                 
401                 //loop through and create all the processes you want
402                 while (process != processors) {
403                         int pid = fork();
404                         
405                         if (pid > 0) {
406                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
407                                 process++;
408                         }else if (pid == 0){
409                                 driver(t, div, sumDiv, procIters[process], increment, randomLeaf, numSampledList, outCollect, outSum, false);
410                                 
411                                 string outTemp = outputDir + toString(getpid()) + ".sumDiv.temp";
412                                 ofstream out;
413                                 m->openOutputFile(outTemp, out);
414                                 
415                                 //output the sumDIversity
416                                 for (itSum = sumDiv.begin(); itSum != sumDiv.end(); itSum++) {
417                                         out << itSum->first << '\t' << (itSum->second).size() << '\t';
418                                         for (int k = 0; k < (itSum->second).size(); k++) { 
419                                                 out << (itSum->second)[k] << '\t';
420                                         }
421                                         out << endl;
422                                 }
423                                 
424                                 out.close();
425                                 
426                                 exit(0);
427                         }else { 
428                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
429                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
430                                 exit(0);
431                         }
432                 }
433                 
434                 driver(t, div, sumDiv, procIters[0], increment, randomLeaf, numSampledList, outCollect, outSum, true);
435                 
436                 //force parent to wait until all the processes are done
437                 for (int i=0;i<(processors-1);i++) { 
438                         int temp = processIDS[i];
439                         wait(&temp);
440                 }
441                 
442                 //get data created by processes
443                 for (int i=0;i<(processors-1);i++) { 
444                         
445                         //input the sumDIversity
446                         string inTemp = outputDir + toString(processIDS[i]) + ".sumDiv.temp";
447                         ifstream in;
448                         m->openInputFile(inTemp, in);
449                                 
450                         //output the sumDIversity
451                         for (int j = 0; j < sumDiv.size(); j++) { 
452                                 string group = "";
453                                 int size = 0;
454                                 
455                                 in >> group >> size; m->gobble(in);
456                                 
457                                 for (int k = 0; k < size; k++) { 
458                                         float tempVal;
459                                         in >> tempVal;
460                                         
461                                         sumDiv[group][k] += tempVal;
462                                 }
463                                 m->gobble(in);
464                         }
465                                 
466                         in.close();
467                         m->mothurRemove(inTemp);
468                 }
469                 
470 #endif
471
472         return 0;               
473         
474         }
475         catch(exception& e) {
476                 m->errorOut(e, "PhyloDiversityCommand", "createProcesses");
477                 exit(1);
478         }
479 }
480 //**********************************************************************************************************************
481 int PhyloDiversityCommand::driver(Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum, bool doSumCollect){
482         try {
483                 int numLeafNodes = randomLeaf.size();
484                 vector<string> mGroups = m->getGroups();
485         
486         map<string, int> rootForGroup = getRootForGroups(t); //maps groupName to root node in tree. "root" for group may not be the trees root and we don't want to include the extra branches.
487         
488                 for (int l = 0; l < numIters; l++) {
489                                 random_shuffle(randomLeaf.begin(), randomLeaf.end());
490          
491                                 //initialize counts
492                                 map<string, int> counts;
493                 vector< map<string, bool> > countedBranch;
494                 for (int i = 0; i < t->getNumNodes(); i++) {
495                     map<string, bool> temp;
496                     for (int j = 0; j < mGroups.size(); j++) { temp[mGroups[j]] = false; }
497                     countedBranch.push_back(temp);
498                 }
499             
500                                 for (int j = 0; j < mGroups.size(); j++) {  counts[mGroups[j]] = 0;   }  
501                                 
502                                 for(int k = 0; k < numLeafNodes; k++){
503                                                 
504                                         if (m->control_pressed) { return 0; }
505                                         
506                                         //calc branch length of randomLeaf k
507                     vector<float> br = calcBranchLength(t, randomLeaf[k], countedBranch, rootForGroup);
508                         
509                                         //for each group in the groups update the total branch length accounting for the names file
510                                         vector<string> groups = t->tree[randomLeaf[k]].getGroup();
511                                         
512                                         for (int j = 0; j < groups.size(); j++) {
513                         
514                         if (m->inUsersGroups(groups[j], mGroups)) {
515                             int numSeqsInGroupJ = 0;
516                             map<string, int>::iterator it;
517                             it = t->tree[randomLeaf[k]].pcount.find(groups[j]);
518                             if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
519                                 numSeqsInGroupJ = it->second;
520                             }
521                             
522                             if (numSeqsInGroupJ != 0) { div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j];  }
523                             
524                             for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
525                                 div[groups[j]][s] = div[groups[j]][s-1];  //update counts, but don't add in redundant branch lengths
526                             }
527                             counts[groups[j]] += numSeqsInGroupJ;
528                         }
529                                         }
530                                 }
531                                 
532                                 if (rarefy) {
533                                         //add this diversity to the sum
534                                         for (int j = 0; j < mGroups.size(); j++) {  
535                                                 for (int g = 0; g < div[mGroups[j]].size(); g++) {
536                                                         sumDiv[mGroups[j]][g] += div[mGroups[j]][g];
537                                                 }
538                                         }
539                                 }
540                                 
541                                 if ((collect) && (l == 0) && doSumCollect) {  printData(numSampledList, div, outCollect, 1);  }
542                                 if ((summary) && (l == 0) && doSumCollect) {  printSumData(div, outSum, 1);  }
543                         }
544                         
545                         return 0;
546
547         }
548         catch(exception& e) {
549                 m->errorOut(e, "PhyloDiversityCommand", "driver");
550                 exit(1);
551         }
552 }
553
554 //**********************************************************************************************************************
555
556 void PhyloDiversityCommand::printSumData(map< string, vector<float> >& div, ofstream& out, int numIters){
557         try {
558                 
559                 out << "Groups\tnumSampled\tphyloDiversity" << endl;
560                 
561                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
562                 
563                 vector<string> mGroups = m->getGroups();
564                 for (int j = 0; j < mGroups.size(); j++) {
565                         int numSampled = (div[mGroups[j]].size()-1);
566                         out << mGroups[j] << '\t' << numSampled << '\t';
567                 
568                          
569                         float score;
570                         if (scale)      {  score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; }
571                         else            {       score = div[mGroups[j]][numSampled] / (float)numIters;  }
572                                 
573                         out << setprecision(4) << score << endl;
574                 }
575                                         
576                 out.close();
577                 
578         }
579         catch(exception& e) {
580                 m->errorOut(e, "PhyloDiversityCommand", "printSumData");
581                 exit(1);
582         }
583 }
584 //**********************************************************************************************************************
585
586 void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float> >& div, ofstream& out, int numIters){
587         try {
588                 
589                 out << "numSampled\t";
590                 vector<string> mGroups = m->getGroups();
591                 for (int i = 0; i < mGroups.size(); i++) { out << mGroups[i] << '\t';  }
592                 out << endl;
593                 
594                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
595                 
596                 for (set<int>::iterator it = num.begin(); it != num.end(); it++) {  
597                         int numSampled = *it;
598                         
599                         out << numSampled << '\t';  
600                 
601                         for (int j = 0; j < mGroups.size(); j++) {
602                                 if (numSampled < div[mGroups[j]].size()) { 
603                                         float score;
604                                         if (scale)      {  score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; }
605                                         else            {       score = div[mGroups[j]][numSampled] / (float)numIters;  }
606
607                                         out << setprecision(4) << score << '\t';
608                                 }else { out << "NA" << '\t'; }
609                         }
610                         out << endl;
611                 }
612                 
613                 out.close();
614                 
615         }
616         catch(exception& e) {
617                 m->errorOut(e, "PhyloDiversityCommand", "printData");
618                 exit(1);
619         }
620 }
621 //**********************************************************************************************************************
622 //need a vector of floats one branch length for every group the node represents.
623 vector<float> PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, vector< map<string, bool> >& counted, map<string, int> roots){
624         try {
625         
626                 //calc the branch length
627                 //while you aren't at root
628                 vector<float> sums; 
629                 int index = leaf;
630                 
631                 vector<string> groups = t->tree[leaf].getGroup();
632                 sums.resize(groups.size(), 0.0);
633                 
634         
635         //you are a leaf
636                 if(t->tree[index].getBranchLength() != -1){     
637                         for (int k = 0; k < groups.size(); k++) { 
638                                 sums[k] += abs(t->tree[index].getBranchLength());       
639                         }
640                 }
641         
642         
643         index = t->tree[index].getParent();     
644         
645                 //while you aren't at root
646                 while(t->tree[index].getParent() != -1){
647             
648                         if (m->control_pressed) {  return sums; }
649                         
650                         for (int k = 0; k < groups.size(); k++) {
651                 
652                 if (index >= roots[groups[k]]) { counted[index][groups[k]] = true; } //if you are at this groups "root", then say we are done
653                 
654                 if (!counted[index][groups[k]]){ //if counted[index][groups[k] is true this groups has already added all br from here to root, so quit early
655                     if (t->tree[index].getBranchLength() != -1) {
656                         sums[k] += abs(t->tree[index].getBranchLength());
657                     }
658                     counted[index][groups[k]] = true;
659                 }
660             }
661             index = t->tree[index].getParent(); 
662         }
663         
664                 return sums;
665         
666         }
667         catch(exception& e) {
668                 m->errorOut(e, "PhyloDiversityCommand", "calcBranchLength");
669                 exit(1);
670         }
671 }
672 //**********************************************************************************************************************
673 map<string, int> PhyloDiversityCommand::getRootForGroups(Tree* t){
674         try {
675                 map<string, int> roots; //maps group to root for group, may not be root of tree
676                 map<string, bool> done;
677        
678                 //initialize root for all groups to -1
679                 for (int k = 0; k < (t->getCountTable())->getNamesOfGroups().size(); k++) { done[(t->getCountTable())->getNamesOfGroups()[k]] = false; }
680         
681         for (int i = 0; i < t->getNumLeaves(); i++) {
682             
683             vector<string> groups = t->tree[i].getGroup();
684             
685             int index = t->tree[i].getParent();
686             
687             for (int j = 0; j < groups.size(); j++) {
688                 
689                 if (done[groups[j]] == false) { //we haven't found the root for this group yet
690                     
691                     done[groups[j]] = true;
692                     roots[groups[j]] = i; //set root to self to start
693                     
694                     //while you aren't at root
695                     while(t->tree[index].getParent() != -1){
696                         
697                         if (m->control_pressed) {  return roots; }
698                         
699                         //do both your chidren have have descendants from the users groups? 
700                         int lc = t->tree[index].getLChild();
701                         int rc = t->tree[index].getRChild();
702                         
703                         int LpcountSize = 0;
704                         map<string, int>:: iterator itGroup = t->tree[lc].pcount.find(groups[j]);
705                         if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++;  } 
706                         
707                         int RpcountSize = 0;
708                         itGroup = t->tree[rc].pcount.find(groups[j]);
709                         if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++;  } 
710                         
711                         if ((LpcountSize != 0) && (RpcountSize != 0)) { //possible root
712                             roots[groups[j]] = index;
713                         }else { ;}
714                         
715                         index = t->tree[index].getParent();     
716                     }
717                 }
718             }
719         }
720         
721         return roots;
722         
723         }
724         catch(exception& e) {
725                 m->errorOut(e, "PhyloDiversityCommand", "getRootForGroups");
726                 exit(1);
727         }
728 }
729 //**********************************************************************************************************************
730
731
732