]> git.donarmstrong.com Git - mothur.git/blob - phylodiversitycommand.cpp
Revert to previous commit
[mothur.git] / phylodiversitycommand.cpp
1 /*
2  *  phylodiversitycommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/30/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "phylodiversitycommand.h"
11 #include "treereader.h"
12
13 //**********************************************************************************************************************
14 vector<string> PhyloDiversityCommand::setParameters(){  
15         try {
16
17                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
18                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pgroup);
19                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
20                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
21                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
22                 CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
23                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24                 CommandParameter prarefy("rarefy", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prarefy);
25                 CommandParameter psummary("summary", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(psummary);
26                 CommandParameter pcollect("collect", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcollect);
27                 CommandParameter pscale("scale", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pscale);
28                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
29                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
30                 
31                 vector<string> myArray;
32                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
33                 return myArray;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "PhyloDiversityCommand", "setParameters");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 string PhyloDiversityCommand::getHelpString(){  
42         try {
43                 string helpString = "";
44                 helpString += "The phylo.diversity command parameters are tree, group, name, groups, iters, freq, processors, scale, rarefy, collect and summary.  tree and group are required, unless you have valid current files.\n";
45                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. The group names are separated by dashes. By default all groups are used.\n";
46                 helpString += "The iters parameter allows you to specify the number of randomizations to preform, by default iters=1000, if you set rarefy to true.\n";
47                 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
48                 helpString += "The scale parameter is used indicate that you want your output scaled to the number of sequences sampled, default = false. \n";
49                 helpString += "The rarefy parameter allows you to create a rarefaction curve. The default is false.\n";
50                 helpString += "The collect parameter allows you to create a collectors curve. The default is false.\n";
51                 helpString += "The summary parameter allows you to create a .summary file. The default is true.\n";
52                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
53                 helpString += "The phylo.diversity command should be in the following format: phylo.diversity(groups=yourGroups, rarefy=yourRarefy, iters=yourIters).\n";
54                 helpString += "Example phylo.diversity(groups=A-B-C, rarefy=T, iters=500).\n";
55                 helpString += "The phylo.diversity command output two files: .phylo.diversity and if rarefy=T, .rarefaction.\n";
56                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
57                 return helpString;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "PhyloDiversityCommand", "getHelpString");
61                 exit(1);
62         }
63 }
64
65
66 //**********************************************************************************************************************
67 PhyloDiversityCommand::PhyloDiversityCommand(){ 
68         try {
69                 abort = true; calledHelp = true; 
70                 setParameters();
71                 vector<string> tempOutNames;
72                 outputTypes["phylodiv"] = tempOutNames;
73                 outputTypes["rarefy"] = tempOutNames;
74                 outputTypes["summary"] = tempOutNames;
75         }
76         catch(exception& e) {
77                 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
78                 exit(1);
79         }
80 }
81 //**********************************************************************************************************************
82 PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
83         try {
84                 abort = false; calledHelp = false;   
85                 
86                 //allow user to run help
87                 if(option == "help") { help(); abort = true; calledHelp = true; }
88                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
89                 
90                 else {
91                         vector<string> myArray = setParameters();;
92                         
93                         OptionParser parser(option);
94                         map<string,string> parameters = parser.getParameters();
95                         map<string,string>::iterator it;
96                         
97                         ValidParameters validParameter;
98                 
99                         //check to make sure all parameters are valid for command
100                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
101                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
102                         }
103                         
104                         //initialize outputTypes
105                         vector<string> tempOutNames;
106                         outputTypes["phylodiv"] = tempOutNames;
107                         outputTypes["rarefy"] = tempOutNames;
108                         outputTypes["summary"] = tempOutNames;
109                         
110                         //if the user changes the input directory command factory will send this info to us in the output parameter 
111                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
112                         if (inputDir == "not found"){   inputDir = "";          }
113                         else {
114                                 string path;
115                                 it = parameters.find("tree");
116                                 //user has given a template file
117                                 if(it != parameters.end()){ 
118                                         path = m->hasPath(it->second);
119                                         //if the user has not given a path then, add inputdir. else leave path alone.
120                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
121                                 }
122                                 
123                                 it = parameters.find("group");
124                                 //user has given a template file
125                                 if(it != parameters.end()){ 
126                                         path = m->hasPath(it->second);
127                                         //if the user has not given a path then, add inputdir. else leave path alone.
128                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
129                                 }
130                                 
131                                 it = parameters.find("name");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
137                                 }
138                         }
139                         
140                         //check for required parameters
141                         treefile = validParameter.validFile(parameters, "tree", true);
142                         if (treefile == "not open") { treefile = ""; abort = true; }
143                         else if (treefile == "not found") {                             
144                                 //if there is a current design file, use it
145                                 treefile = m->getTreeFile(); 
146                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
147                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
148                         }else { m->setTreeFile(treefile); }     
149                         
150                         //check for required parameters
151                         groupfile = validParameter.validFile(parameters, "group", true);
152                         if (groupfile == "not open") { groupfile = ""; abort = true; }
153                         else if (groupfile == "not found") { groupfile = ""; }
154                         else { m->setGroupFile(groupfile); }
155                         
156                         namefile = validParameter.validFile(parameters, "name", true);
157                         if (namefile == "not open") { namefile = ""; abort = true; }
158                         else if (namefile == "not found") { namefile = ""; }
159                         else { m->setNameFile(namefile); }
160                         
161                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
162                         
163                         string temp;
164                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
165                         m->mothurConvert(temp, freq); 
166                         
167                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
168                         m->mothurConvert(temp, iters); 
169                         
170                         temp = validParameter.validFile(parameters, "rarefy", false);                   if (temp == "not found") { temp = "F"; }
171                         rarefy = m->isTrue(temp);
172                         if (!rarefy) { iters = 1;  }
173                         
174                         temp = validParameter.validFile(parameters, "summary", false);                  if (temp == "not found") { temp = "T"; }
175                         summary = m->isTrue(temp);
176                         
177                         temp = validParameter.validFile(parameters, "scale", false);                    if (temp == "not found") { temp = "F"; }
178                         scale = m->isTrue(temp);
179                         
180                         temp = validParameter.validFile(parameters, "collect", false);                  if (temp == "not found") { temp = "F"; }
181                         collect = m->isTrue(temp);
182                         
183                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
184                         m->setProcessors(temp);
185                         m->mothurConvert(temp, processors); 
186                         
187                         groups = validParameter.validFile(parameters, "groups", false);                 
188                         if (groups == "not found") { groups = "";  }
189                         else { 
190                                 m->splitAtDash(groups, Groups);
191                                 m->setGroups(Groups);
192                         }
193                         
194                         if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default."); m->mothurOutEndLine(); abort=true; }
195                         
196                         if (namefile == "") {
197                                 vector<string> files; files.push_back(treefile);
198                                 parser.getNameFile(files);
199                         }
200                 }
201                 
202         }
203         catch(exception& e) {
204                 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
205                 exit(1);
206         }                       
207 }
208 //**********************************************************************************************************************
209
210 int PhyloDiversityCommand::execute(){
211         try {
212                 
213                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
214                 
215                 m->setTreeFile(treefile);
216         TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
217         vector<Tree*> trees = reader->getTrees();
218         tmap = trees[0]->getTreeMap();
219         delete reader;
220
221                 SharedUtil util;
222                 vector<string> mGroups = m->getGroups();
223                 vector<string> tGroups = tmap->getNamesOfGroups();
224                 util.setGroups(mGroups, tGroups, "phylo.diversity");    //sets the groups the user wants to analyze
225                 
226                 //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
227                 for (int i = 0; i < mGroups.size(); i++) { if (mGroups[i] == "xxx") { mGroups.erase(mGroups.begin()+i);  break; }  }
228                 m->setGroups(mGroups);
229                  
230                 vector<string> outputNames;
231                 
232                 //for each of the users trees
233                 for(int i = 0; i < trees.size(); i++) {
234                 
235                         if (m->control_pressed) { delete tmap; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        } return 0; }
236                         
237                         ofstream outSum, outRare, outCollect;
238                         string outSumFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + ".phylodiv.summary";
239                         string outRareFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + ".phylodiv.rarefaction";
240                         string outCollectFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + ".phylodiv";
241                         
242                         if (summary)    { m->openOutputFile(outSumFile, outSum); outputNames.push_back(outSumFile);             outputTypes["summary"].push_back(outSumFile);                   }
243                         if (rarefy)             { m->openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile);  outputTypes["rarefy"].push_back(outRareFile);                   }
244                         if (collect)    { m->openOutputFile(outCollectFile, outCollect); outputNames.push_back(outCollectFile);  outputTypes["phylodiv"].push_back(outCollectFile);  }
245                         
246                         int numLeafNodes = trees[i]->getNumLeaves();
247                         
248                         //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
249                         vector<int> randomLeaf;
250                         for (int j = 0; j < numLeafNodes; j++) {  
251                                 if (m->inUsersGroups(trees[i]->tree[j].getGroup(), mGroups) == true) { //is this a node from the group the user selected.
252                                         randomLeaf.push_back(j); 
253                                 }
254                         }
255                         
256                         numLeafNodes = randomLeaf.size();  //reset the number of leaf nodes you are using 
257                         
258                         //each group, each sampling, if no rarefy iters = 1;
259                         map<string, vector<float> > diversity;
260                         
261                         //each group, each sampling, if no rarefy iters = 1;
262                         map<string, vector<float> > sumDiversity;
263                         
264                         //find largest group total 
265                         int largestGroup = 0;
266                         for (int j = 0; j < mGroups.size(); j++) {  
267                                 if (tmap->seqsPerGroup[mGroups[j]] > largestGroup) { largestGroup = tmap->seqsPerGroup[mGroups[j]]; }
268                                 
269                                 //initialize diversity
270                                 diversity[mGroups[j]].resize(tmap->seqsPerGroup[mGroups[j]]+1, 0.0);            //numSampled
271                                                                                                                                                                                                                         //groupA                0.0                     0.0
272                                                                                                                                                                                                                         
273                                 //initialize sumDiversity
274                                 sumDiversity[mGroups[j]].resize(tmap->seqsPerGroup[mGroups[j]]+1, 0.0);
275                         }       
276
277                         //convert freq percentage to number
278                         int increment = 100;
279                         if (freq < 1.0) {  increment = largestGroup * freq;  
280                         }else { increment = freq;  }
281                         
282                         //initialize sampling spots
283                         set<int> numSampledList;
284                         for(int k = 1; k <= largestGroup; k++){  if((k == 1) || (k % increment == 0)){  numSampledList.insert(k); }   }
285                         if(largestGroup % increment != 0){      numSampledList.insert(largestGroup);   }
286                         
287                         //add other groups ending points
288                         for (int j = 0; j < mGroups.size(); j++) {  
289                                 if (numSampledList.count(diversity[mGroups[j]].size()-1) == 0) {  numSampledList.insert(diversity[mGroups[j]].size()-1); }
290                         }
291                         
292                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
293                                 if(processors == 1){
294                                         driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
295                                 }else{
296                                         if (rarefy) {
297                                                 vector<int> procIters;
298                                                 
299                                                 int numItersPerProcessor = iters / processors;
300                                                 
301                                                 //divide iters between processes
302                                                 for (int h = 0; h < processors; h++) {
303                                                         if(h == processors - 1){
304                                                                 numItersPerProcessor = iters - h * numItersPerProcessor;
305                                                         }
306                                                         procIters.push_back(numItersPerProcessor);
307                                                 }
308                                                 
309                                                 createProcesses(procIters, trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum); 
310                                                 
311                                         }else{ //no need to paralellize if you dont want to rarefy
312                                                 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
313                                         }
314                                 }
315
316                         #else
317                                 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
318                         #endif
319
320                         if (rarefy) {   printData(numSampledList, sumDiversity, outRare, iters);        }
321                 }
322                 
323         
324                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } return 0; }
325
326                 m->mothurOutEndLine();
327                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
328                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
329                 m->mothurOutEndLine();
330
331                 
332                 return 0;
333         }
334         catch(exception& e) {
335                 m->errorOut(e, "PhyloDiversityCommand", "execute");
336                 exit(1);
337         }
338 }
339 //**********************************************************************************************************************
340 int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum){
341         try {
342                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
343                 int process = 1;
344                 
345                 vector<int> processIDS;
346                 map< string, vector<float> >::iterator itSum;
347                 
348                 //loop through and create all the processes you want
349                 while (process != processors) {
350                         int pid = fork();
351                         
352                         if (pid > 0) {
353                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
354                                 process++;
355                         }else if (pid == 0){
356                                 driver(t, div, sumDiv, procIters[process], increment, randomLeaf, numSampledList, outCollect, outSum, false);
357                                 
358                                 string outTemp = outputDir + toString(getpid()) + ".sumDiv.temp";
359                                 ofstream out;
360                                 m->openOutputFile(outTemp, out);
361                                 
362                                 //output the sumDIversity
363                                 for (itSum = sumDiv.begin(); itSum != sumDiv.end(); itSum++) {
364                                         out << itSum->first << '\t' << (itSum->second).size() << '\t';
365                                         for (int k = 0; k < (itSum->second).size(); k++) { 
366                                                 out << (itSum->second)[k] << '\t';
367                                         }
368                                         out << endl;
369                                 }
370                                 
371                                 out.close();
372                                 
373                                 exit(0);
374                         }else { 
375                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
376                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
377                                 exit(0);
378                         }
379                 }
380                 
381                 driver(t, div, sumDiv, procIters[0], increment, randomLeaf, numSampledList, outCollect, outSum, true);
382                 
383                 //force parent to wait until all the processes are done
384                 for (int i=0;i<(processors-1);i++) { 
385                         int temp = processIDS[i];
386                         wait(&temp);
387                 }
388                 
389                 //get data created by processes
390                 for (int i=0;i<(processors-1);i++) { 
391                         
392                         //input the sumDIversity
393                         string inTemp = outputDir + toString(processIDS[i]) + ".sumDiv.temp";
394                         ifstream in;
395                         m->openInputFile(inTemp, in);
396                                 
397                         //output the sumDIversity
398                         for (int j = 0; j < sumDiv.size(); j++) { 
399                                 string group = "";
400                                 int size = 0;
401                                 
402                                 in >> group >> size; m->gobble(in);
403                                 
404                                 for (int k = 0; k < size; k++) { 
405                                         float tempVal;
406                                         in >> tempVal;
407                                         
408                                         sumDiv[group][k] += tempVal;
409                                 }
410                                 m->gobble(in);
411                         }
412                                 
413                         in.close();
414                         m->mothurRemove(inTemp);
415                 }
416                 
417 #endif
418
419         return 0;               
420         
421         }
422         catch(exception& e) {
423                 m->errorOut(e, "PhyloDiversityCommand", "createProcesses");
424                 exit(1);
425         }
426 }
427 //**********************************************************************************************************************
428 int PhyloDiversityCommand::driver(Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum, bool doSumCollect){
429         try {
430                 int numLeafNodes = randomLeaf.size();
431                 vector<string> mGroups = m->getGroups();
432         
433                 for (int l = 0; l < numIters; l++) {
434                                 random_shuffle(randomLeaf.begin(), randomLeaf.end());
435                 
436                                 //initialize counts
437                                 map<string, int> counts;
438                                 map< string, set<int> > countedBranch;  
439                                 for (int j = 0; j < mGroups.size(); j++) {  counts[mGroups[j]] = 0; countedBranch[mGroups[j]].insert(-2);  }  //add dummy index to initialize countedBranch sets
440                                 
441                                 for(int k = 0; k < numLeafNodes; k++){
442                                                 
443                                         if (m->control_pressed) { return 0; }
444                                         
445                                         //calc branch length of randomLeaf k
446                                         vector<float> br = calcBranchLength(t, randomLeaf[k], countedBranch);
447                         
448                                         //for each group in the groups update the total branch length accounting for the names file
449                                         vector<string> groups = t->tree[randomLeaf[k]].getGroup();
450                                         
451                                         for (int j = 0; j < groups.size(); j++) {
452                                                 int numSeqsInGroupJ = 0;
453                                                 map<string, int>::iterator it;
454                                                 it = t->tree[randomLeaf[k]].pcount.find(groups[j]);
455                                                 if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
456                                                         numSeqsInGroupJ = it->second;
457                                                 }
458                                                 
459                                                 if (numSeqsInGroupJ != 0) {     div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j];  }
460                                                 
461                                                 for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
462                                                         div[groups[j]][s] = div[groups[j]][s-1];  //update counts, but don't add in redundant branch lengths
463                                                 }
464                                                 counts[groups[j]] += numSeqsInGroupJ;
465                                         }
466                                 }
467                                 
468                                 if (rarefy) {
469                                         //add this diversity to the sum
470                                         for (int j = 0; j < mGroups.size(); j++) {  
471                                                 for (int g = 0; g < div[mGroups[j]].size(); g++) {
472                                                         sumDiv[mGroups[j]][g] += div[mGroups[j]][g];
473                                                 }
474                                         }
475                                 }
476                                 
477                                 if ((collect) && (l == 0) && doSumCollect) {  printData(numSampledList, div, outCollect, 1);  }
478                                 if ((summary) && (l == 0) && doSumCollect) {  printSumData(div, outSum, 1);  }
479                         }
480                         
481                         return 0;
482
483         }
484         catch(exception& e) {
485                 m->errorOut(e, "PhyloDiversityCommand", "driver");
486                 exit(1);
487         }
488 }
489
490 //**********************************************************************************************************************
491
492 void PhyloDiversityCommand::printSumData(map< string, vector<float> >& div, ofstream& out, int numIters){
493         try {
494                 
495                 out << "Groups\tnumSampled\tphyloDiversity" << endl;
496                 
497                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
498                 
499                 vector<string> mGroups = m->getGroups();
500                 for (int j = 0; j < mGroups.size(); j++) {
501                         int numSampled = (div[mGroups[j]].size()-1);
502                         out << mGroups[j] << '\t' << numSampled << '\t';
503                 
504                          
505                         float score;
506                         if (scale)      {  score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; }
507                         else            {       score = div[mGroups[j]][numSampled] / (float)numIters;  }
508                                 
509                         out << setprecision(4) << score << endl;
510                 }
511                                         
512                 out.close();
513                 
514         }
515         catch(exception& e) {
516                 m->errorOut(e, "PhyloDiversityCommand", "printSumData");
517                 exit(1);
518         }
519 }
520 //**********************************************************************************************************************
521
522 void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float> >& div, ofstream& out, int numIters){
523         try {
524                 
525                 out << "numSampled\t";
526                 vector<string> mGroups = m->getGroups();
527                 for (int i = 0; i < mGroups.size(); i++) { out << mGroups[i] << '\t';  }
528                 out << endl;
529                 
530                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
531                 
532                 for (set<int>::iterator it = num.begin(); it != num.end(); it++) {  
533                         int numSampled = *it;
534                         
535                         out << numSampled << '\t';  
536                 
537                         for (int j = 0; j < mGroups.size(); j++) {
538                                 if (numSampled < div[mGroups[j]].size()) { 
539                                         float score;
540                                         if (scale)      {  score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; }
541                                         else            {       score = div[mGroups[j]][numSampled] / (float)numIters;  }
542
543                                         out << setprecision(4) << score << '\t';
544                                 }else { out << "NA" << '\t'; }
545                         }
546                         out << endl;
547                 }
548                 
549                 out.close();
550                 
551         }
552         catch(exception& e) {
553                 m->errorOut(e, "PhyloDiversityCommand", "printData");
554                 exit(1);
555         }
556 }
557 //**********************************************************************************************************************
558 //need a vector of floats one branch length for every group the node represents.
559 vector<float> PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< string, set<int> >& counted){
560         try {
561
562                 //calc the branch length
563                 //while you aren't at root
564                 vector<float> sums; 
565                 int index = leaf;
566                 
567                 vector<string> groups = t->tree[leaf].getGroup();
568                 sums.resize(groups.size(), 0.0);
569                 
570                 map<string, map<int, double> > tempTotals; //maps node to total Branch Length
571                 map< string, set<int> > tempCounted;
572                 set<int>::iterator it;
573         
574                 //you are a leaf
575                 if(t->tree[index].getBranchLength() != -1){     
576                         for (int k = 0; k < groups.size(); k++) { 
577                                 sums[k] += abs(t->tree[index].getBranchLength());       
578                                 counted[groups[k]].insert(index);
579                         }
580                 }
581                 
582                 for (int k = 0; k < groups.size(); k++) { 
583                         tempTotals[groups[k]][index] = 0.0;     
584                 }
585                 
586                 index = t->tree[index].getParent();     
587                         
588                 //while you aren't at root
589                 while(t->tree[index].getParent() != -1){
590
591                         if (m->control_pressed) {  return sums; }
592                         
593                         int pcountSize = 0;     
594                         for (int k = 0; k < groups.size(); k++) {
595                                 map<string, int>::iterator itGroup = t->tree[index].pcount.find(groups[k]);
596                                 if (itGroup != t->tree[index].pcount.end()) { pcountSize++;  } 
597                         
598                                 //do both your chidren have have descendants from the users groups? 
599                                 int lc = t->tree[index].getLChild();
600                                 int rc = t->tree[index].getRChild();
601                         
602                                 int LpcountSize = 0;
603                                 itGroup = t->tree[lc].pcount.find(groups[k]);
604                                 if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++;  } 
605                                                         
606                                 int RpcountSize = 0;
607                                 itGroup = t->tree[rc].pcount.find(groups[k]);
608                                 if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++;  } 
609                                                                 
610                                 //if yes, add your childrens tempTotals
611                                 if ((LpcountSize != 0) && (RpcountSize != 0)) {
612                                         sums[k] += tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc]; 
613                                         
614                                         for (it = tempCounted[groups[k]].begin(); it != tempCounted[groups[k]].end(); it++) { counted[groups[k]].insert(*it); }
615
616                                         //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl;
617                                         if (t->tree[index].getBranchLength() != -1) {
618                                                 if (counted[groups[k]].count(index) == 0) {
619                                                         tempTotals[groups[k]][index] = abs(t->tree[index].getBranchLength());
620                                                         tempCounted[groups[k]].insert(index);
621                                                 }else{
622                                                         tempTotals[groups[k]][index] = 0.0;
623                                                 }
624                                         }else {
625                                                 tempTotals[groups[k]][index] = 0.0;
626                                         }
627                                 }else { //if no, your tempTotal is your childrens temp totals + your branch length
628                                         tempTotals[groups[k]][index] = tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc]; 
629                                                                         
630                                         if (counted[groups[k]].count(index) == 0) {
631                                                 tempTotals[groups[k]][index] += abs(t->tree[index].getBranchLength());
632                                                 tempCounted[groups[k]].insert(index);
633                                         }
634
635                                 }
636                                 //cout << "temptotal = "<< tempTotals[i] << endl;
637                         }
638                         
639                         index = t->tree[index].getParent();     
640                 }
641
642                 return sums;
643
644         }
645         catch(exception& e) {
646                 m->errorOut(e, "PhyloDiversityCommand", "calcBranchLength");
647                 exit(1);
648         }
649 }
650 //**********************************************************************************************************************
651
652
653