]> git.donarmstrong.com Git - mothur.git/blob - phylodiversitycommand.cpp
added some debug stuff to chimera.uchime. optimized phylo.diversity to improve raref...
[mothur.git] / phylodiversitycommand.cpp
1 /*
2  *  phylodiversitycommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/30/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "phylodiversitycommand.h"
11 #include "treereader.h"
12
13 //**********************************************************************************************************************
14 vector<string> PhyloDiversityCommand::setParameters(){  
15         try {
16
17                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
18                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pgroup);
19                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
20                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
21                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
22                 CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
23                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24                 CommandParameter prarefy("rarefy", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prarefy);
25                 CommandParameter psummary("summary", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(psummary);
26                 CommandParameter pcollect("collect", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcollect);
27                 CommandParameter pscale("scale", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pscale);
28                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
29                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
30                 
31                 vector<string> myArray;
32                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
33                 return myArray;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "PhyloDiversityCommand", "setParameters");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 string PhyloDiversityCommand::getHelpString(){  
42         try {
43                 string helpString = "";
44                 helpString += "The phylo.diversity command parameters are tree, group, name, groups, iters, freq, processors, scale, rarefy, collect and summary.  tree and group are required, unless you have valid current files.\n";
45                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. The group names are separated by dashes. By default all groups are used.\n";
46                 helpString += "The iters parameter allows you to specify the number of randomizations to preform, by default iters=1000, if you set rarefy to true.\n";
47                 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
48                 helpString += "The scale parameter is used indicate that you want your output scaled to the number of sequences sampled, default = false. \n";
49                 helpString += "The rarefy parameter allows you to create a rarefaction curve. The default is false.\n";
50                 helpString += "The collect parameter allows you to create a collectors curve. The default is false.\n";
51                 helpString += "The summary parameter allows you to create a .summary file. The default is true.\n";
52                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
53                 helpString += "The phylo.diversity command should be in the following format: phylo.diversity(groups=yourGroups, rarefy=yourRarefy, iters=yourIters).\n";
54                 helpString += "Example phylo.diversity(groups=A-B-C, rarefy=T, iters=500).\n";
55                 helpString += "The phylo.diversity command output two files: .phylo.diversity and if rarefy=T, .rarefaction.\n";
56                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
57                 return helpString;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "PhyloDiversityCommand", "getHelpString");
61                 exit(1);
62         }
63 }
64
65
66 //**********************************************************************************************************************
67 PhyloDiversityCommand::PhyloDiversityCommand(){ 
68         try {
69                 abort = true; calledHelp = true; 
70                 setParameters();
71                 vector<string> tempOutNames;
72                 outputTypes["phylodiv"] = tempOutNames;
73                 outputTypes["rarefy"] = tempOutNames;
74                 outputTypes["summary"] = tempOutNames;
75         }
76         catch(exception& e) {
77                 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
78                 exit(1);
79         }
80 }
81 //**********************************************************************************************************************
82 PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
83         try {
84                 abort = false; calledHelp = false;   
85                 
86                 //allow user to run help
87                 if(option == "help") { help(); abort = true; calledHelp = true; }
88                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
89                 
90                 else {
91                         vector<string> myArray = setParameters();;
92                         
93                         OptionParser parser(option);
94                         map<string,string> parameters = parser.getParameters();
95                         map<string,string>::iterator it;
96                         
97                         ValidParameters validParameter;
98                 
99                         //check to make sure all parameters are valid for command
100                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
101                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
102                         }
103                         
104                         //initialize outputTypes
105                         vector<string> tempOutNames;
106                         outputTypes["phylodiv"] = tempOutNames;
107                         outputTypes["rarefy"] = tempOutNames;
108                         outputTypes["summary"] = tempOutNames;
109                         
110                         //if the user changes the input directory command factory will send this info to us in the output parameter 
111                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
112                         if (inputDir == "not found"){   inputDir = "";          }
113                         else {
114                                 string path;
115                                 it = parameters.find("tree");
116                                 //user has given a template file
117                                 if(it != parameters.end()){ 
118                                         path = m->hasPath(it->second);
119                                         //if the user has not given a path then, add inputdir. else leave path alone.
120                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
121                                 }
122                                 
123                                 it = parameters.find("group");
124                                 //user has given a template file
125                                 if(it != parameters.end()){ 
126                                         path = m->hasPath(it->second);
127                                         //if the user has not given a path then, add inputdir. else leave path alone.
128                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
129                                 }
130                                 
131                                 it = parameters.find("name");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
137                                 }
138                         }
139                         
140                         //check for required parameters
141                         treefile = validParameter.validFile(parameters, "tree", true);
142                         if (treefile == "not open") { treefile = ""; abort = true; }
143                         else if (treefile == "not found") {                             
144                                 //if there is a current design file, use it
145                                 treefile = m->getTreeFile(); 
146                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
147                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
148                         }else { m->setTreeFile(treefile); }     
149                         
150                         //check for required parameters
151                         groupfile = validParameter.validFile(parameters, "group", true);
152                         if (groupfile == "not open") { groupfile = ""; abort = true; }
153                         else if (groupfile == "not found") { groupfile = ""; }
154                         else { m->setGroupFile(groupfile); }
155                         
156                         namefile = validParameter.validFile(parameters, "name", true);
157                         if (namefile == "not open") { namefile = ""; abort = true; }
158                         else if (namefile == "not found") { namefile = ""; }
159                         else { m->setNameFile(namefile); }
160                         
161                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
162                         
163                         string temp;
164                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
165                         m->mothurConvert(temp, freq); 
166                         
167                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
168                         m->mothurConvert(temp, iters); 
169                         
170                         temp = validParameter.validFile(parameters, "rarefy", false);                   if (temp == "not found") { temp = "F"; }
171                         rarefy = m->isTrue(temp);
172                         if (!rarefy) { iters = 1;  }
173                         
174                         temp = validParameter.validFile(parameters, "summary", false);                  if (temp == "not found") { temp = "T"; }
175                         summary = m->isTrue(temp);
176                         
177                         temp = validParameter.validFile(parameters, "scale", false);                    if (temp == "not found") { temp = "F"; }
178                         scale = m->isTrue(temp);
179                         
180                         temp = validParameter.validFile(parameters, "collect", false);                  if (temp == "not found") { temp = "F"; }
181                         collect = m->isTrue(temp);
182                         
183                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
184                         m->setProcessors(temp);
185                         m->mothurConvert(temp, processors); 
186                         
187                         groups = validParameter.validFile(parameters, "groups", false);                 
188                         if (groups == "not found") { groups = "";  }
189                         else { 
190                                 m->splitAtDash(groups, Groups);
191                                 m->setGroups(Groups);
192                         }
193                         
194                         if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default."); m->mothurOutEndLine(); abort=true; }
195                         
196                         if (namefile == "") {
197                                 vector<string> files; files.push_back(treefile);
198                                 parser.getNameFile(files);
199                         }
200                 }
201                 
202         }
203         catch(exception& e) {
204                 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
205                 exit(1);
206         }                       
207 }
208 //**********************************************************************************************************************
209
210 int PhyloDiversityCommand::execute(){
211         try {
212                 
213                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
214                 
215         int start = time(NULL);
216         
217                 m->setTreeFile(treefile);
218         TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
219         vector<Tree*> trees = reader->getTrees();
220         tmap = trees[0]->getTreeMap();
221         delete reader;
222
223                 SharedUtil util;
224                 vector<string> mGroups = m->getGroups();
225                 vector<string> tGroups = tmap->getNamesOfGroups();
226                 util.setGroups(mGroups, tGroups, "phylo.diversity");    //sets the groups the user wants to analyze
227                 
228                 //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
229                 for (int i = 0; i < mGroups.size(); i++) { if (mGroups[i] == "xxx") { mGroups.erase(mGroups.begin()+i);  break; }  }
230                 m->setGroups(mGroups);
231                  
232                 vector<string> outputNames;
233                 
234                 //for each of the users trees
235                 for(int i = 0; i < trees.size(); i++) {
236                 
237                         if (m->control_pressed) { delete tmap; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        } return 0; }
238                         
239                         ofstream outSum, outRare, outCollect;
240                         string outSumFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + ".phylodiv.summary";
241                         string outRareFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + ".phylodiv.rarefaction";
242                         string outCollectFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + ".phylodiv";
243                         
244                         if (summary)    { m->openOutputFile(outSumFile, outSum); outputNames.push_back(outSumFile);             outputTypes["summary"].push_back(outSumFile);                   }
245                         if (rarefy)             { m->openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile);  outputTypes["rarefy"].push_back(outRareFile);                   }
246                         if (collect)    { m->openOutputFile(outCollectFile, outCollect); outputNames.push_back(outCollectFile);  outputTypes["phylodiv"].push_back(outCollectFile);  }
247                         
248                         int numLeafNodes = trees[i]->getNumLeaves();
249                         
250                         //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
251                         vector<int> randomLeaf;
252                         for (int j = 0; j < numLeafNodes; j++) {  
253                                 if (m->inUsersGroups(trees[i]->tree[j].getGroup(), mGroups) == true) { //is this a node from the group the user selected.
254                                         randomLeaf.push_back(j); 
255                                 }
256                         }
257                         
258                         numLeafNodes = randomLeaf.size();  //reset the number of leaf nodes you are using 
259                         
260                         //each group, each sampling, if no rarefy iters = 1;
261                         map<string, vector<float> > diversity;
262                         
263                         //each group, each sampling, if no rarefy iters = 1;
264                         map<string, vector<float> > sumDiversity;
265                         
266                         //find largest group total 
267                         int largestGroup = 0;
268                         for (int j = 0; j < mGroups.size(); j++) {  
269                                 if (tmap->seqsPerGroup[mGroups[j]] > largestGroup) { largestGroup = tmap->seqsPerGroup[mGroups[j]]; }
270                                 
271                                 //initialize diversity
272                                 diversity[mGroups[j]].resize(tmap->seqsPerGroup[mGroups[j]]+1, 0.0);            //numSampled
273                                                                                                                                                                                                                         //groupA                0.0                     0.0
274                                                                                                                                                                                                                         
275                                 //initialize sumDiversity
276                                 sumDiversity[mGroups[j]].resize(tmap->seqsPerGroup[mGroups[j]]+1, 0.0);
277                         }       
278
279                         //convert freq percentage to number
280                         int increment = 100;
281                         if (freq < 1.0) {  increment = largestGroup * freq;  
282                         }else { increment = freq;  }
283                         
284                         //initialize sampling spots
285                         set<int> numSampledList;
286                         for(int k = 1; k <= largestGroup; k++){  if((k == 1) || (k % increment == 0)){  numSampledList.insert(k); }   }
287                         if(largestGroup % increment != 0){      numSampledList.insert(largestGroup);   }
288                         
289                         //add other groups ending points
290                         for (int j = 0; j < mGroups.size(); j++) {  
291                                 if (numSampledList.count(diversity[mGroups[j]].size()-1) == 0) {  numSampledList.insert(diversity[mGroups[j]].size()-1); }
292                         }
293                         
294                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
295                                 if(processors == 1){
296                                         driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
297                                 }else{
298                                         if (rarefy) {
299                                                 vector<int> procIters;
300                                                 
301                                                 int numItersPerProcessor = iters / processors;
302                                                 
303                                                 //divide iters between processes
304                                                 for (int h = 0; h < processors; h++) {
305                                                         if(h == processors - 1){
306                                                                 numItersPerProcessor = iters - h * numItersPerProcessor;
307                                                         }
308                                                         procIters.push_back(numItersPerProcessor);
309                                                 }
310                                                 
311                                                 createProcesses(procIters, trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum); 
312                                                 
313                                         }else{ //no need to paralellize if you dont want to rarefy
314                                                 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
315                                         }
316                                 }
317
318                         #else
319                                 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
320                         #endif
321
322                         if (rarefy) {   printData(numSampledList, sumDiversity, outRare, iters);        }
323                 }
324                 
325         
326                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } return 0; }
327
328         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
329
330         
331                 m->mothurOutEndLine();
332                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
333                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
334                 m->mothurOutEndLine();
335
336                 
337                 return 0;
338         }
339         catch(exception& e) {
340                 m->errorOut(e, "PhyloDiversityCommand", "execute");
341                 exit(1);
342         }
343 }
344 //**********************************************************************************************************************
345 int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum){
346         try {
347                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
348                 int process = 1;
349                 
350                 vector<int> processIDS;
351                 map< string, vector<float> >::iterator itSum;
352                 
353                 //loop through and create all the processes you want
354                 while (process != processors) {
355                         int pid = fork();
356                         
357                         if (pid > 0) {
358                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
359                                 process++;
360                         }else if (pid == 0){
361                                 driver(t, div, sumDiv, procIters[process], increment, randomLeaf, numSampledList, outCollect, outSum, false);
362                                 
363                                 string outTemp = outputDir + toString(getpid()) + ".sumDiv.temp";
364                                 ofstream out;
365                                 m->openOutputFile(outTemp, out);
366                                 
367                                 //output the sumDIversity
368                                 for (itSum = sumDiv.begin(); itSum != sumDiv.end(); itSum++) {
369                                         out << itSum->first << '\t' << (itSum->second).size() << '\t';
370                                         for (int k = 0; k < (itSum->second).size(); k++) { 
371                                                 out << (itSum->second)[k] << '\t';
372                                         }
373                                         out << endl;
374                                 }
375                                 
376                                 out.close();
377                                 
378                                 exit(0);
379                         }else { 
380                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
381                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
382                                 exit(0);
383                         }
384                 }
385                 
386                 driver(t, div, sumDiv, procIters[0], increment, randomLeaf, numSampledList, outCollect, outSum, true);
387                 
388                 //force parent to wait until all the processes are done
389                 for (int i=0;i<(processors-1);i++) { 
390                         int temp = processIDS[i];
391                         wait(&temp);
392                 }
393                 
394                 //get data created by processes
395                 for (int i=0;i<(processors-1);i++) { 
396                         
397                         //input the sumDIversity
398                         string inTemp = outputDir + toString(processIDS[i]) + ".sumDiv.temp";
399                         ifstream in;
400                         m->openInputFile(inTemp, in);
401                                 
402                         //output the sumDIversity
403                         for (int j = 0; j < sumDiv.size(); j++) { 
404                                 string group = "";
405                                 int size = 0;
406                                 
407                                 in >> group >> size; m->gobble(in);
408                                 
409                                 for (int k = 0; k < size; k++) { 
410                                         float tempVal;
411                                         in >> tempVal;
412                                         
413                                         sumDiv[group][k] += tempVal;
414                                 }
415                                 m->gobble(in);
416                         }
417                                 
418                         in.close();
419                         m->mothurRemove(inTemp);
420                 }
421                 
422 #endif
423
424         return 0;               
425         
426         }
427         catch(exception& e) {
428                 m->errorOut(e, "PhyloDiversityCommand", "createProcesses");
429                 exit(1);
430         }
431 }
432 //**********************************************************************************************************************
433 int PhyloDiversityCommand::driver(Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum, bool doSumCollect){
434         try {
435                 int numLeafNodes = randomLeaf.size();
436                 vector<string> mGroups = m->getGroups();
437         
438         map<string, int> rootForGroup = getRootForGroups(t); //maps groupName to root node in tree. "root" for group may not be the trees root and we don't want to include the extra branches.
439         
440                 for (int l = 0; l < numIters; l++) {
441                                 random_shuffle(randomLeaf.begin(), randomLeaf.end());
442             cout << l << endl;
443                                 //initialize counts
444                                 map<string, int> counts;
445                 vector< map<string, bool> > countedBranch;
446                 for (int i = 0; i < t->getNumNodes(); i++) {
447                     map<string, bool> temp;
448                     for (int j = 0; j < mGroups.size(); j++) { temp[mGroups[j]] = false; }
449                     countedBranch.push_back(temp);
450                 }
451             
452                                 for (int j = 0; j < mGroups.size(); j++) {  counts[mGroups[j]] = 0;   }  
453                                 
454                                 for(int k = 0; k < numLeafNodes; k++){
455                                                 
456                                         if (m->control_pressed) { return 0; }
457                                         
458                                         //calc branch length of randomLeaf k
459                     vector<float> br = calcBranchLength(t, randomLeaf[k], countedBranch, rootForGroup);
460                         
461                                         //for each group in the groups update the total branch length accounting for the names file
462                                         vector<string> groups = t->tree[randomLeaf[k]].getGroup();
463                                         
464                                         for (int j = 0; j < groups.size(); j++) {
465                         
466                         if (m->inUsersGroups(groups[j], mGroups)) {
467                             int numSeqsInGroupJ = 0;
468                             map<string, int>::iterator it;
469                             it = t->tree[randomLeaf[k]].pcount.find(groups[j]);
470                             if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
471                                 numSeqsInGroupJ = it->second;
472                             }
473                             
474                             if (numSeqsInGroupJ != 0) { div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j];  }
475                             
476                             for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
477                                 div[groups[j]][s] = div[groups[j]][s-1];  //update counts, but don't add in redundant branch lengths
478                             }
479                             counts[groups[j]] += numSeqsInGroupJ;
480                         }
481                                         }
482                                 }
483                                 
484                                 if (rarefy) {
485                                         //add this diversity to the sum
486                                         for (int j = 0; j < mGroups.size(); j++) {  
487                                                 for (int g = 0; g < div[mGroups[j]].size(); g++) {
488                                                         sumDiv[mGroups[j]][g] += div[mGroups[j]][g];
489                                                 }
490                                         }
491                                 }
492                                 
493                                 if ((collect) && (l == 0) && doSumCollect) {  printData(numSampledList, div, outCollect, 1);  }
494                                 if ((summary) && (l == 0) && doSumCollect) {  printSumData(div, outSum, 1);  }
495                         }
496                         
497                         return 0;
498
499         }
500         catch(exception& e) {
501                 m->errorOut(e, "PhyloDiversityCommand", "driver");
502                 exit(1);
503         }
504 }
505
506 //**********************************************************************************************************************
507
508 void PhyloDiversityCommand::printSumData(map< string, vector<float> >& div, ofstream& out, int numIters){
509         try {
510                 
511                 out << "Groups\tnumSampled\tphyloDiversity" << endl;
512                 
513                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
514                 
515                 vector<string> mGroups = m->getGroups();
516                 for (int j = 0; j < mGroups.size(); j++) {
517                         int numSampled = (div[mGroups[j]].size()-1);
518                         out << mGroups[j] << '\t' << numSampled << '\t';
519                 
520                          
521                         float score;
522                         if (scale)      {  score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; }
523                         else            {       score = div[mGroups[j]][numSampled] / (float)numIters;  }
524                                 
525                         out << setprecision(4) << score << endl;
526                 }
527                                         
528                 out.close();
529                 
530         }
531         catch(exception& e) {
532                 m->errorOut(e, "PhyloDiversityCommand", "printSumData");
533                 exit(1);
534         }
535 }
536 //**********************************************************************************************************************
537
538 void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float> >& div, ofstream& out, int numIters){
539         try {
540                 
541                 out << "numSampled\t";
542                 vector<string> mGroups = m->getGroups();
543                 for (int i = 0; i < mGroups.size(); i++) { out << mGroups[i] << '\t';  }
544                 out << endl;
545                 
546                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
547                 
548                 for (set<int>::iterator it = num.begin(); it != num.end(); it++) {  
549                         int numSampled = *it;
550                         
551                         out << numSampled << '\t';  
552                 
553                         for (int j = 0; j < mGroups.size(); j++) {
554                                 if (numSampled < div[mGroups[j]].size()) { 
555                                         float score;
556                                         if (scale)      {  score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; }
557                                         else            {       score = div[mGroups[j]][numSampled] / (float)numIters;  }
558
559                                         out << setprecision(4) << score << '\t';
560                                 }else { out << "NA" << '\t'; }
561                         }
562                         out << endl;
563                 }
564                 
565                 out.close();
566                 
567         }
568         catch(exception& e) {
569                 m->errorOut(e, "PhyloDiversityCommand", "printData");
570                 exit(1);
571         }
572 }
573 //**********************************************************************************************************************
574 //need a vector of floats one branch length for every group the node represents.
575 vector<float> PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, vector< map<string, bool> >& counted, map<string, int> roots){
576         try {
577         
578                 //calc the branch length
579                 //while you aren't at root
580                 vector<float> sums; 
581                 int index = leaf;
582                 
583                 vector<string> groups = t->tree[leaf].getGroup();
584                 sums.resize(groups.size(), 0.0);
585                 
586         
587         //you are a leaf
588                 if(t->tree[index].getBranchLength() != -1){     
589                         for (int k = 0; k < groups.size(); k++) { 
590                                 sums[k] += abs(t->tree[index].getBranchLength());       
591                         }
592                 }
593         
594         
595         index = t->tree[index].getParent();     
596         
597                 //while you aren't at root
598                 while(t->tree[index].getParent() != -1){
599             
600                         if (m->control_pressed) {  return sums; }
601                         
602                         for (int k = 0; k < groups.size(); k++) {
603                 
604                 if (index >= roots[groups[k]]) { counted[index][groups[k]] = true; } //if you are at this groups "root", then say we are done
605                 
606                 if (!counted[index][groups[k]]){ //if counted[index][groups[k] is true this groups has already added all br from here to root, so quit early
607                     if (t->tree[index].getBranchLength() != -1) {
608                         sums[k] += abs(t->tree[index].getBranchLength());
609                     }
610                     counted[index][groups[k]] = true;
611                 }
612             }
613             index = t->tree[index].getParent(); 
614         }
615         
616                 return sums;
617         
618         }
619         catch(exception& e) {
620                 m->errorOut(e, "PhyloDiversityCommand", "calcBranchLength");
621                 exit(1);
622         }
623 }
624 //**********************************************************************************************************************
625 map<string, int> PhyloDiversityCommand::getRootForGroups(Tree* t){
626         try {
627                 map<string, int> roots; //maps group to root for group, may not be root of tree
628                 map<string, bool> done;
629        
630                 //initialize root for all groups to -1
631                 for (int k = 0; k < (t->getTreeMap())->getNamesOfGroups().size(); k++) { done[(t->getTreeMap())->getNamesOfGroups()[k]] = false; }
632         
633         for (int i = 0; i < t->getNumLeaves(); i++) {
634             
635             vector<string> groups = t->tree[i].getGroup();
636             
637             int index = t->tree[i].getParent();
638             
639             for (int j = 0; j < groups.size(); j++) {
640                 
641                 if (done[groups[j]] == false) { //we haven't found the root for this group yet
642                     
643                     done[groups[j]] = true;
644                     roots[groups[j]] = i; //set root to self to start
645                     
646                     //while you aren't at root
647                     while(t->tree[index].getParent() != -1){
648                         
649                         if (m->control_pressed) {  return roots; }
650                         
651                         //do both your chidren have have descendants from the users groups? 
652                         int lc = t->tree[index].getLChild();
653                         int rc = t->tree[index].getRChild();
654                         
655                         int LpcountSize = 0;
656                         map<string, int>:: iterator itGroup = t->tree[lc].pcount.find(groups[j]);
657                         if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++;  } 
658                         
659                         int RpcountSize = 0;
660                         itGroup = t->tree[rc].pcount.find(groups[j]);
661                         if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++;  } 
662                         
663                         if ((LpcountSize != 0) && (RpcountSize != 0)) { //possible root
664                             roots[groups[j]] = index;
665                         }else { ;}
666                         
667                         index = t->tree[index].getParent();     
668                     }
669                 }
670             }
671         }
672         
673         return roots;
674         
675         }
676         catch(exception& e) {
677                 m->errorOut(e, "PhyloDiversityCommand", "getRootForGroups");
678                 exit(1);
679         }
680 }
681 //**********************************************************************************************************************
682
683
684