]> git.donarmstrong.com Git - mothur.git/blob - phylodiversitycommand.cpp
added load.logfile command. changed summary.single output for subsample=t.
[mothur.git] / phylodiversitycommand.cpp
1 /*
2  *  phylodiversitycommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/30/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "phylodiversitycommand.h"
11 #include "treereader.h"
12
13 //**********************************************************************************************************************
14 vector<string> PhyloDiversityCommand::setParameters(){  
15         try {
16
17                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
18                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pgroup);
19                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
20                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
21                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
22                 CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
23                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24                 CommandParameter prarefy("rarefy", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prarefy);
25                 CommandParameter psummary("summary", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(psummary);
26                 CommandParameter pcollect("collect", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcollect);
27                 CommandParameter pscale("scale", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pscale);
28                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
29                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
30                 
31                 vector<string> myArray;
32                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
33                 return myArray;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "PhyloDiversityCommand", "setParameters");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 string PhyloDiversityCommand::getHelpString(){  
42         try {
43                 string helpString = "";
44                 helpString += "The phylo.diversity command parameters are tree, group, name, groups, iters, freq, processors, scale, rarefy, collect and summary.  tree and group are required, unless you have valid current files.\n";
45                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. The group names are separated by dashes. By default all groups are used.\n";
46                 helpString += "The iters parameter allows you to specify the number of randomizations to preform, by default iters=1000, if you set rarefy to true.\n";
47                 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
48                 helpString += "The scale parameter is used indicate that you want your output scaled to the number of sequences sampled, default = false. \n";
49                 helpString += "The rarefy parameter allows you to create a rarefaction curve. The default is false.\n";
50                 helpString += "The collect parameter allows you to create a collectors curve. The default is false.\n";
51                 helpString += "The summary parameter allows you to create a .summary file. The default is true.\n";
52                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
53                 helpString += "The phylo.diversity command should be in the following format: phylo.diversity(groups=yourGroups, rarefy=yourRarefy, iters=yourIters).\n";
54                 helpString += "Example phylo.diversity(groups=A-B-C, rarefy=T, iters=500).\n";
55                 helpString += "The phylo.diversity command output two files: .phylo.diversity and if rarefy=T, .rarefaction.\n";
56                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
57                 return helpString;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "PhyloDiversityCommand", "getHelpString");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 string PhyloDiversityCommand::getOutputFileNameTag(string type, string inputName=""){   
66         try {
67         string outputFileName = "";
68                 map<string, vector<string> >::iterator it;
69         
70         //is this a type this command creates
71         it = outputTypes.find(type);
72         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
73         else {
74             if (type == "phylodiv") {  outputFileName =  "phylodiv"; }
75             else if (type == "rarefy") {  outputFileName =  "phylodiv.rarefaction"; }
76             else if (type == "summary") {  outputFileName =  "phylodiv.summary"; }
77             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
78         }
79         return outputFileName;
80         }
81         catch(exception& e) {
82                 m->errorOut(e, "PhyloDiversityCommand", "getOutputFileNameTag");
83                 exit(1);
84         }
85 }
86
87 //**********************************************************************************************************************
88 PhyloDiversityCommand::PhyloDiversityCommand(){ 
89         try {
90                 abort = true; calledHelp = true; 
91                 setParameters();
92                 vector<string> tempOutNames;
93                 outputTypes["phylodiv"] = tempOutNames;
94                 outputTypes["rarefy"] = tempOutNames;
95                 outputTypes["summary"] = tempOutNames;
96         }
97         catch(exception& e) {
98                 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
99                 exit(1);
100         }
101 }
102 //**********************************************************************************************************************
103 PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
104         try {
105                 abort = false; calledHelp = false;   
106                 
107                 //allow user to run help
108                 if(option == "help") { help(); abort = true; calledHelp = true; }
109                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
110                 
111                 else {
112                         vector<string> myArray = setParameters();;
113                         
114                         OptionParser parser(option);
115                         map<string,string> parameters = parser.getParameters();
116                         map<string,string>::iterator it;
117                         
118                         ValidParameters validParameter;
119                 
120                         //check to make sure all parameters are valid for command
121                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
122                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
123                         }
124                         
125                         //initialize outputTypes
126                         vector<string> tempOutNames;
127                         outputTypes["phylodiv"] = tempOutNames;
128                         outputTypes["rarefy"] = tempOutNames;
129                         outputTypes["summary"] = tempOutNames;
130                         
131                         //if the user changes the input directory command factory will send this info to us in the output parameter 
132                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
133                         if (inputDir == "not found"){   inputDir = "";          }
134                         else {
135                                 string path;
136                                 it = parameters.find("tree");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
142                                 }
143                                 
144                                 it = parameters.find("group");
145                                 //user has given a template file
146                                 if(it != parameters.end()){ 
147                                         path = m->hasPath(it->second);
148                                         //if the user has not given a path then, add inputdir. else leave path alone.
149                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
150                                 }
151                                 
152                                 it = parameters.find("name");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
158                                 }
159                         }
160                         
161                         //check for required parameters
162                         treefile = validParameter.validFile(parameters, "tree", true);
163                         if (treefile == "not open") { treefile = ""; abort = true; }
164                         else if (treefile == "not found") {                             
165                                 //if there is a current design file, use it
166                                 treefile = m->getTreeFile(); 
167                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
168                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
169                         }else { m->setTreeFile(treefile); }     
170                         
171                         //check for required parameters
172                         groupfile = validParameter.validFile(parameters, "group", true);
173                         if (groupfile == "not open") { groupfile = ""; abort = true; }
174                         else if (groupfile == "not found") { groupfile = ""; }
175                         else { m->setGroupFile(groupfile); }
176                         
177                         namefile = validParameter.validFile(parameters, "name", true);
178                         if (namefile == "not open") { namefile = ""; abort = true; }
179                         else if (namefile == "not found") { namefile = ""; }
180                         else { m->setNameFile(namefile); }
181                         
182                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
183                         
184                         string temp;
185                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
186                         m->mothurConvert(temp, freq); 
187                         
188                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
189                         m->mothurConvert(temp, iters); 
190                         
191                         temp = validParameter.validFile(parameters, "rarefy", false);                   if (temp == "not found") { temp = "F"; }
192                         rarefy = m->isTrue(temp);
193                         if (!rarefy) { iters = 1;  }
194                         
195                         temp = validParameter.validFile(parameters, "summary", false);                  if (temp == "not found") { temp = "T"; }
196                         summary = m->isTrue(temp);
197                         
198                         temp = validParameter.validFile(parameters, "scale", false);                    if (temp == "not found") { temp = "F"; }
199                         scale = m->isTrue(temp);
200                         
201                         temp = validParameter.validFile(parameters, "collect", false);                  if (temp == "not found") { temp = "F"; }
202                         collect = m->isTrue(temp);
203                         
204                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
205                         m->setProcessors(temp);
206                         m->mothurConvert(temp, processors); 
207                         
208                         groups = validParameter.validFile(parameters, "groups", false);                 
209                         if (groups == "not found") { groups = "";  }
210                         else { 
211                                 m->splitAtDash(groups, Groups);
212                                 m->setGroups(Groups);
213                         }
214                         
215                         if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default."); m->mothurOutEndLine(); abort=true; }
216                         
217                         if (namefile == "") {
218                                 vector<string> files; files.push_back(treefile);
219                                 parser.getNameFile(files);
220                         }
221                 }
222                 
223         }
224         catch(exception& e) {
225                 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
226                 exit(1);
227         }                       
228 }
229 //**********************************************************************************************************************
230
231 int PhyloDiversityCommand::execute(){
232         try {
233                 
234                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
235                 
236         int start = time(NULL);
237         
238                 m->setTreeFile(treefile);
239         TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
240         vector<Tree*> trees = reader->getTrees();
241         tmap = trees[0]->getTreeMap();
242         delete reader;
243
244                 SharedUtil util;
245                 vector<string> mGroups = m->getGroups();
246                 vector<string> tGroups = tmap->getNamesOfGroups();
247                 util.setGroups(mGroups, tGroups, "phylo.diversity");    //sets the groups the user wants to analyze
248                 
249                 //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
250                 for (int i = 0; i < mGroups.size(); i++) { if (mGroups[i] == "xxx") { mGroups.erase(mGroups.begin()+i);  break; }  }
251                 m->setGroups(mGroups);
252                  
253                 vector<string> outputNames;
254                 
255                 //for each of the users trees
256                 for(int i = 0; i < trees.size(); i++) {
257                 
258                         if (m->control_pressed) { delete tmap; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        } return 0; }
259                         
260                         ofstream outSum, outRare, outCollect;
261                         string outSumFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + "." + getOutputFileNameTag("summary");
262                         string outRareFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + "." + getOutputFileNameTag("rarefy");
263                         string outCollectFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + "." + getOutputFileNameTag("phylodiv");
264                         
265                         if (summary)    { m->openOutputFile(outSumFile, outSum); outputNames.push_back(outSumFile);             outputTypes["summary"].push_back(outSumFile);                   }
266                         if (rarefy)             { m->openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile);  outputTypes["rarefy"].push_back(outRareFile);                   }
267                         if (collect)    { m->openOutputFile(outCollectFile, outCollect); outputNames.push_back(outCollectFile);  outputTypes["phylodiv"].push_back(outCollectFile);  }
268                         
269                         int numLeafNodes = trees[i]->getNumLeaves();
270                         
271                         //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
272                         vector<int> randomLeaf;
273                         for (int j = 0; j < numLeafNodes; j++) {  
274                                 if (m->inUsersGroups(trees[i]->tree[j].getGroup(), mGroups) == true) { //is this a node from the group the user selected.
275                                         randomLeaf.push_back(j); 
276                                 }
277                         }
278                         
279                         numLeafNodes = randomLeaf.size();  //reset the number of leaf nodes you are using 
280                         
281                         //each group, each sampling, if no rarefy iters = 1;
282                         map<string, vector<float> > diversity;
283                         
284                         //each group, each sampling, if no rarefy iters = 1;
285                         map<string, vector<float> > sumDiversity;
286                         
287                         //find largest group total 
288                         int largestGroup = 0;
289                         for (int j = 0; j < mGroups.size(); j++) {  
290                                 if (tmap->seqsPerGroup[mGroups[j]] > largestGroup) { largestGroup = tmap->seqsPerGroup[mGroups[j]]; }
291                                 
292                                 //initialize diversity
293                                 diversity[mGroups[j]].resize(tmap->seqsPerGroup[mGroups[j]]+1, 0.0);            //numSampled
294                                                                                                                                                                                                                         //groupA                0.0                     0.0
295                                                                                                                                                                                                                         
296                                 //initialize sumDiversity
297                                 sumDiversity[mGroups[j]].resize(tmap->seqsPerGroup[mGroups[j]]+1, 0.0);
298                         }       
299
300                         //convert freq percentage to number
301                         int increment = 100;
302                         if (freq < 1.0) {  increment = largestGroup * freq;  
303                         }else { increment = freq;  }
304                         
305                         //initialize sampling spots
306                         set<int> numSampledList;
307                         for(int k = 1; k <= largestGroup; k++){  if((k == 1) || (k % increment == 0)){  numSampledList.insert(k); }   }
308                         if(largestGroup % increment != 0){      numSampledList.insert(largestGroup);   }
309                         
310                         //add other groups ending points
311                         for (int j = 0; j < mGroups.size(); j++) {  
312                                 if (numSampledList.count(diversity[mGroups[j]].size()-1) == 0) {  numSampledList.insert(diversity[mGroups[j]].size()-1); }
313                         }
314                         
315                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
316                                 if(processors == 1){
317                                         driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
318                                 }else{
319                                         if (rarefy) {
320                                                 vector<int> procIters;
321                                                 
322                                                 int numItersPerProcessor = iters / processors;
323                                                 
324                                                 //divide iters between processes
325                                                 for (int h = 0; h < processors; h++) {
326                                                         if(h == processors - 1){
327                                                                 numItersPerProcessor = iters - h * numItersPerProcessor;
328                                                         }
329                                                         procIters.push_back(numItersPerProcessor);
330                                                 }
331                                                 
332                                                 createProcesses(procIters, trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum); 
333                                                 
334                                         }else{ //no need to paralellize if you dont want to rarefy
335                                                 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
336                                         }
337                                 }
338
339                         #else
340                                 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
341                         #endif
342
343                         if (rarefy) {   printData(numSampledList, sumDiversity, outRare, iters);        }
344                 }
345                 
346         
347                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } return 0; }
348
349         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
350
351         
352                 m->mothurOutEndLine();
353                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
354                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
355                 m->mothurOutEndLine();
356
357                 
358                 return 0;
359         }
360         catch(exception& e) {
361                 m->errorOut(e, "PhyloDiversityCommand", "execute");
362                 exit(1);
363         }
364 }
365 //**********************************************************************************************************************
366 int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum){
367         try {
368                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
369                 int process = 1;
370                 
371                 vector<int> processIDS;
372                 map< string, vector<float> >::iterator itSum;
373                 
374                 //loop through and create all the processes you want
375                 while (process != processors) {
376                         int pid = fork();
377                         
378                         if (pid > 0) {
379                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
380                                 process++;
381                         }else if (pid == 0){
382                                 driver(t, div, sumDiv, procIters[process], increment, randomLeaf, numSampledList, outCollect, outSum, false);
383                                 
384                                 string outTemp = outputDir + toString(getpid()) + ".sumDiv.temp";
385                                 ofstream out;
386                                 m->openOutputFile(outTemp, out);
387                                 
388                                 //output the sumDIversity
389                                 for (itSum = sumDiv.begin(); itSum != sumDiv.end(); itSum++) {
390                                         out << itSum->first << '\t' << (itSum->second).size() << '\t';
391                                         for (int k = 0; k < (itSum->second).size(); k++) { 
392                                                 out << (itSum->second)[k] << '\t';
393                                         }
394                                         out << endl;
395                                 }
396                                 
397                                 out.close();
398                                 
399                                 exit(0);
400                         }else { 
401                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
402                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
403                                 exit(0);
404                         }
405                 }
406                 
407                 driver(t, div, sumDiv, procIters[0], increment, randomLeaf, numSampledList, outCollect, outSum, true);
408                 
409                 //force parent to wait until all the processes are done
410                 for (int i=0;i<(processors-1);i++) { 
411                         int temp = processIDS[i];
412                         wait(&temp);
413                 }
414                 
415                 //get data created by processes
416                 for (int i=0;i<(processors-1);i++) { 
417                         
418                         //input the sumDIversity
419                         string inTemp = outputDir + toString(processIDS[i]) + ".sumDiv.temp";
420                         ifstream in;
421                         m->openInputFile(inTemp, in);
422                                 
423                         //output the sumDIversity
424                         for (int j = 0; j < sumDiv.size(); j++) { 
425                                 string group = "";
426                                 int size = 0;
427                                 
428                                 in >> group >> size; m->gobble(in);
429                                 
430                                 for (int k = 0; k < size; k++) { 
431                                         float tempVal;
432                                         in >> tempVal;
433                                         
434                                         sumDiv[group][k] += tempVal;
435                                 }
436                                 m->gobble(in);
437                         }
438                                 
439                         in.close();
440                         m->mothurRemove(inTemp);
441                 }
442                 
443 #endif
444
445         return 0;               
446         
447         }
448         catch(exception& e) {
449                 m->errorOut(e, "PhyloDiversityCommand", "createProcesses");
450                 exit(1);
451         }
452 }
453 //**********************************************************************************************************************
454 int PhyloDiversityCommand::driver(Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum, bool doSumCollect){
455         try {
456                 int numLeafNodes = randomLeaf.size();
457                 vector<string> mGroups = m->getGroups();
458         
459         map<string, int> rootForGroup = getRootForGroups(t); //maps groupName to root node in tree. "root" for group may not be the trees root and we don't want to include the extra branches.
460         
461                 for (int l = 0; l < numIters; l++) {
462                                 random_shuffle(randomLeaf.begin(), randomLeaf.end());
463             cout << l << endl;
464                                 //initialize counts
465                                 map<string, int> counts;
466                 vector< map<string, bool> > countedBranch;
467                 for (int i = 0; i < t->getNumNodes(); i++) {
468                     map<string, bool> temp;
469                     for (int j = 0; j < mGroups.size(); j++) { temp[mGroups[j]] = false; }
470                     countedBranch.push_back(temp);
471                 }
472             
473                                 for (int j = 0; j < mGroups.size(); j++) {  counts[mGroups[j]] = 0;   }  
474                                 
475                                 for(int k = 0; k < numLeafNodes; k++){
476                                                 
477                                         if (m->control_pressed) { return 0; }
478                                         
479                                         //calc branch length of randomLeaf k
480                     vector<float> br = calcBranchLength(t, randomLeaf[k], countedBranch, rootForGroup);
481                         
482                                         //for each group in the groups update the total branch length accounting for the names file
483                                         vector<string> groups = t->tree[randomLeaf[k]].getGroup();
484                                         
485                                         for (int j = 0; j < groups.size(); j++) {
486                         
487                         if (m->inUsersGroups(groups[j], mGroups)) {
488                             int numSeqsInGroupJ = 0;
489                             map<string, int>::iterator it;
490                             it = t->tree[randomLeaf[k]].pcount.find(groups[j]);
491                             if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
492                                 numSeqsInGroupJ = it->second;
493                             }
494                             
495                             if (numSeqsInGroupJ != 0) { div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j];  }
496                             
497                             for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
498                                 div[groups[j]][s] = div[groups[j]][s-1];  //update counts, but don't add in redundant branch lengths
499                             }
500                             counts[groups[j]] += numSeqsInGroupJ;
501                         }
502                                         }
503                                 }
504                                 
505                                 if (rarefy) {
506                                         //add this diversity to the sum
507                                         for (int j = 0; j < mGroups.size(); j++) {  
508                                                 for (int g = 0; g < div[mGroups[j]].size(); g++) {
509                                                         sumDiv[mGroups[j]][g] += div[mGroups[j]][g];
510                                                 }
511                                         }
512                                 }
513                                 
514                                 if ((collect) && (l == 0) && doSumCollect) {  printData(numSampledList, div, outCollect, 1);  }
515                                 if ((summary) && (l == 0) && doSumCollect) {  printSumData(div, outSum, 1);  }
516                         }
517                         
518                         return 0;
519
520         }
521         catch(exception& e) {
522                 m->errorOut(e, "PhyloDiversityCommand", "driver");
523                 exit(1);
524         }
525 }
526
527 //**********************************************************************************************************************
528
529 void PhyloDiversityCommand::printSumData(map< string, vector<float> >& div, ofstream& out, int numIters){
530         try {
531                 
532                 out << "Groups\tnumSampled\tphyloDiversity" << endl;
533                 
534                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
535                 
536                 vector<string> mGroups = m->getGroups();
537                 for (int j = 0; j < mGroups.size(); j++) {
538                         int numSampled = (div[mGroups[j]].size()-1);
539                         out << mGroups[j] << '\t' << numSampled << '\t';
540                 
541                          
542                         float score;
543                         if (scale)      {  score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; }
544                         else            {       score = div[mGroups[j]][numSampled] / (float)numIters;  }
545                                 
546                         out << setprecision(4) << score << endl;
547                 }
548                                         
549                 out.close();
550                 
551         }
552         catch(exception& e) {
553                 m->errorOut(e, "PhyloDiversityCommand", "printSumData");
554                 exit(1);
555         }
556 }
557 //**********************************************************************************************************************
558
559 void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float> >& div, ofstream& out, int numIters){
560         try {
561                 
562                 out << "numSampled\t";
563                 vector<string> mGroups = m->getGroups();
564                 for (int i = 0; i < mGroups.size(); i++) { out << mGroups[i] << '\t';  }
565                 out << endl;
566                 
567                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
568                 
569                 for (set<int>::iterator it = num.begin(); it != num.end(); it++) {  
570                         int numSampled = *it;
571                         
572                         out << numSampled << '\t';  
573                 
574                         for (int j = 0; j < mGroups.size(); j++) {
575                                 if (numSampled < div[mGroups[j]].size()) { 
576                                         float score;
577                                         if (scale)      {  score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; }
578                                         else            {       score = div[mGroups[j]][numSampled] / (float)numIters;  }
579
580                                         out << setprecision(4) << score << '\t';
581                                 }else { out << "NA" << '\t'; }
582                         }
583                         out << endl;
584                 }
585                 
586                 out.close();
587                 
588         }
589         catch(exception& e) {
590                 m->errorOut(e, "PhyloDiversityCommand", "printData");
591                 exit(1);
592         }
593 }
594 //**********************************************************************************************************************
595 //need a vector of floats one branch length for every group the node represents.
596 vector<float> PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, vector< map<string, bool> >& counted, map<string, int> roots){
597         try {
598         
599                 //calc the branch length
600                 //while you aren't at root
601                 vector<float> sums; 
602                 int index = leaf;
603                 
604                 vector<string> groups = t->tree[leaf].getGroup();
605                 sums.resize(groups.size(), 0.0);
606                 
607         
608         //you are a leaf
609                 if(t->tree[index].getBranchLength() != -1){     
610                         for (int k = 0; k < groups.size(); k++) { 
611                                 sums[k] += abs(t->tree[index].getBranchLength());       
612                         }
613                 }
614         
615         
616         index = t->tree[index].getParent();     
617         
618                 //while you aren't at root
619                 while(t->tree[index].getParent() != -1){
620             
621                         if (m->control_pressed) {  return sums; }
622                         
623                         for (int k = 0; k < groups.size(); k++) {
624                 
625                 if (index >= roots[groups[k]]) { counted[index][groups[k]] = true; } //if you are at this groups "root", then say we are done
626                 
627                 if (!counted[index][groups[k]]){ //if counted[index][groups[k] is true this groups has already added all br from here to root, so quit early
628                     if (t->tree[index].getBranchLength() != -1) {
629                         sums[k] += abs(t->tree[index].getBranchLength());
630                     }
631                     counted[index][groups[k]] = true;
632                 }
633             }
634             index = t->tree[index].getParent(); 
635         }
636         
637                 return sums;
638         
639         }
640         catch(exception& e) {
641                 m->errorOut(e, "PhyloDiversityCommand", "calcBranchLength");
642                 exit(1);
643         }
644 }
645 //**********************************************************************************************************************
646 map<string, int> PhyloDiversityCommand::getRootForGroups(Tree* t){
647         try {
648                 map<string, int> roots; //maps group to root for group, may not be root of tree
649                 map<string, bool> done;
650        
651                 //initialize root for all groups to -1
652                 for (int k = 0; k < (t->getTreeMap())->getNamesOfGroups().size(); k++) { done[(t->getTreeMap())->getNamesOfGroups()[k]] = false; }
653         
654         for (int i = 0; i < t->getNumLeaves(); i++) {
655             
656             vector<string> groups = t->tree[i].getGroup();
657             
658             int index = t->tree[i].getParent();
659             
660             for (int j = 0; j < groups.size(); j++) {
661                 
662                 if (done[groups[j]] == false) { //we haven't found the root for this group yet
663                     
664                     done[groups[j]] = true;
665                     roots[groups[j]] = i; //set root to self to start
666                     
667                     //while you aren't at root
668                     while(t->tree[index].getParent() != -1){
669                         
670                         if (m->control_pressed) {  return roots; }
671                         
672                         //do both your chidren have have descendants from the users groups? 
673                         int lc = t->tree[index].getLChild();
674                         int rc = t->tree[index].getRChild();
675                         
676                         int LpcountSize = 0;
677                         map<string, int>:: iterator itGroup = t->tree[lc].pcount.find(groups[j]);
678                         if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++;  } 
679                         
680                         int RpcountSize = 0;
681                         itGroup = t->tree[rc].pcount.find(groups[j]);
682                         if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++;  } 
683                         
684                         if ((LpcountSize != 0) && (RpcountSize != 0)) { //possible root
685                             roots[groups[j]] = index;
686                         }else { ;}
687                         
688                         index = t->tree[index].getParent();     
689                     }
690                 }
691             }
692         }
693         
694         return roots;
695         
696         }
697         catch(exception& e) {
698                 m->errorOut(e, "PhyloDiversityCommand", "getRootForGroups");
699                 exit(1);
700         }
701 }
702 //**********************************************************************************************************************
703
704
705