]> git.donarmstrong.com Git - mothur.git/blob - phylodiversitycommand.cpp
added getCommandInfoCommand for gui
[mothur.git] / phylodiversitycommand.cpp
1 /*
2  *  phylodiversitycommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/30/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "phylodiversitycommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> PhyloDiversityCommand::setParameters(){  
14         try {
15
16                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
17                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pgroup);
18                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
19                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
20                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
21                 CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
22                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
23                 CommandParameter prarefy("rarefy", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prarefy);
24                 CommandParameter psummary("summary", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(psummary);
25                 CommandParameter pcollect("collect", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pcollect);
26                 CommandParameter pscale("scale", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pscale);
27                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29                 
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "PhyloDiversityCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string PhyloDiversityCommand::getHelpString(){  
41         try {
42                 string helpString = "";
43                 helpString += "The phylo.diversity command parameters are tree, group, name, groups, iters, freq, processors, scale, rarefy, collect and summary.  tree and group are required, unless you have valid current files.\n";
44                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. The group names are separated by dashes. By default all groups are used.\n";
45                 helpString += "The iters parameter allows you to specify the number of randomizations to preform, by default iters=1000, if you set rarefy to true.\n";
46                 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
47                 helpString += "The scale parameter is used indicate that you want your output scaled to the number of sequences sampled, default = false. \n";
48                 helpString += "The rarefy parameter allows you to create a rarefaction curve. The default is false.\n";
49                 helpString += "The collect parameter allows you to create a collectors curve. The default is false.\n";
50                 helpString += "The summary parameter allows you to create a .summary file. The default is true.\n";
51                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52                 helpString += "The phylo.diversity command should be in the following format: phylo.diversity(groups=yourGroups, rarefy=yourRarefy, iters=yourIters).\n";
53                 helpString += "Example phylo.diversity(groups=A-B-C, rarefy=T, iters=500).\n";
54                 helpString += "The phylo.diversity command output two files: .phylo.diversity and if rarefy=T, .rarefaction.\n";
55                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
56                 return helpString;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "PhyloDiversityCommand", "getHelpString");
60                 exit(1);
61         }
62 }
63
64
65 //**********************************************************************************************************************
66 PhyloDiversityCommand::PhyloDiversityCommand(){ 
67         try {
68                 abort = true; calledHelp = true; 
69                 setParameters();
70                 vector<string> tempOutNames;
71                 outputTypes["phylodiv"] = tempOutNames;
72                 outputTypes["rarefy"] = tempOutNames;
73                 outputTypes["summary"] = tempOutNames;
74         }
75         catch(exception& e) {
76                 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
77                 exit(1);
78         }
79 }
80 //**********************************************************************************************************************
81 PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
82         try {
83                 abort = false; calledHelp = false;   
84                 
85                 //allow user to run help
86                 if(option == "help") { help(); abort = true; calledHelp = true; }
87                 
88                 else {
89                         vector<string> myArray = setParameters();;
90                         
91                         OptionParser parser(option);
92                         map<string,string> parameters = parser.getParameters();
93                         map<string,string>::iterator it;
94                         
95                         ValidParameters validParameter;
96                 
97                         //check to make sure all parameters are valid for command
98                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
99                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
100                         }
101                         
102                         //initialize outputTypes
103                         vector<string> tempOutNames;
104                         outputTypes["phylodiv"] = tempOutNames;
105                         outputTypes["rarefy"] = tempOutNames;
106                         outputTypes["summary"] = tempOutNames;
107                         
108                         //if the user changes the input directory command factory will send this info to us in the output parameter 
109                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
110                         if (inputDir == "not found"){   inputDir = "";          }
111                         else {
112                                 string path;
113                                 it = parameters.find("tree");
114                                 //user has given a template file
115                                 if(it != parameters.end()){ 
116                                         path = m->hasPath(it->second);
117                                         //if the user has not given a path then, add inputdir. else leave path alone.
118                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
119                                 }
120                                 
121                                 it = parameters.find("group");
122                                 //user has given a template file
123                                 if(it != parameters.end()){ 
124                                         path = m->hasPath(it->second);
125                                         //if the user has not given a path then, add inputdir. else leave path alone.
126                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
127                                 }
128                                 
129                                 it = parameters.find("name");
130                                 //user has given a template file
131                                 if(it != parameters.end()){ 
132                                         path = m->hasPath(it->second);
133                                         //if the user has not given a path then, add inputdir. else leave path alone.
134                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
135                                 }
136                         }
137                         
138                         m->runParse = true;
139                         
140                         //check for required parameters
141                         treefile = validParameter.validFile(parameters, "tree", true);
142                         if (treefile == "not open") { abort = true; }
143                         else if (treefile == "not found") {                             
144                                 //if there is a current design file, use it
145                                 treefile = m->getTreeFile(); 
146                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
147                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
148                         }       
149                         
150                         //check for required parameters
151                         groupfile = validParameter.validFile(parameters, "group", true);
152                         if (groupfile == "not open") { abort = true; }
153                         else if (groupfile == "not found") { 
154                                 //if there is a current design file, use it
155                                 groupfile = m->getGroupFile(); 
156                                 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
157                                 else {  m->mothurOut("You have no current group file and the group parameter is required."); m->mothurOutEndLine(); abort = true; }                                                             
158                         }
159                         
160                         namefile = validParameter.validFile(parameters, "name", true);
161                         if (namefile == "not open") { abort = true; }
162                         else if (namefile == "not found") { namefile = ""; }
163                         
164                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
165                         
166                         string temp;
167                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
168                         convert(temp, freq); 
169                         
170                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
171                         convert(temp, iters); 
172                         
173                         temp = validParameter.validFile(parameters, "rarefy", false);                   if (temp == "not found") { temp = "F"; }
174                         rarefy = m->isTrue(temp);
175                         if (!rarefy) { iters = 1;  }
176                         
177                         temp = validParameter.validFile(parameters, "summary", false);                  if (temp == "not found") { temp = "T"; }
178                         summary = m->isTrue(temp);
179                         
180                         temp = validParameter.validFile(parameters, "scale", false);                    if (temp == "not found") { temp = "F"; }
181                         scale = m->isTrue(temp);
182                         
183                         temp = validParameter.validFile(parameters, "collect", false);                  if (temp == "not found") { temp = "F"; }
184                         collect = m->isTrue(temp);
185                         
186                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
187                         m->setProcessors(temp);
188                         convert(temp, processors); 
189                         
190                         groups = validParameter.validFile(parameters, "groups", false);                 
191                         if (groups == "not found") { groups = "";  }
192                         else { 
193                                 m->splitAtDash(groups, Groups);
194                                 m->Groups = Groups;
195                         }
196                         
197                         if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default."); m->mothurOutEndLine(); abort=true; }
198                 }
199                 
200         }
201         catch(exception& e) {
202                 m->errorOut(e, "PhyloDiversityCommand", "PhyloDiversityCommand");
203                 exit(1);
204         }                       
205 }
206 //**********************************************************************************************************************
207
208 int PhyloDiversityCommand::execute(){
209         try {
210                 
211                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
212                 
213                 //read in group map info.
214                 tmap = new TreeMap(groupfile);
215                 tmap->readMap();
216                 
217                 if (namefile != "") { readNamesFile(); }
218                 
219                 read = new ReadNewickTree(treefile);
220                 int readOk = read->read(tmap); 
221                 
222                 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
223                 
224                 read->AssembleTrees();
225                 vector<Tree*> trees = read->getTrees();
226                 delete read;
227                 
228                 //make sure all files match
229                 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
230                 int numNamesInTree;
231                 if (namefile != "")  {  
232                         if (numUniquesInName == m->Treenames.size()) {  numNamesInTree = nameMap.size();  }
233                         else {   numNamesInTree = m->Treenames.size();  }
234                 }else {  numNamesInTree = m->Treenames.size();  }
235                 
236                 
237                 //output any names that are in group file but not in tree
238                 if (numNamesInTree < tmap->getNumSeqs()) {
239                         for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
240                                 //is that name in the tree?
241                                 int count = 0;
242                                 for (int j = 0; j < m->Treenames.size(); j++) {
243                                         if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
244                                         count++;
245                                 }
246                                 
247                                 if (m->control_pressed) { 
248                                         delete tmap; for (int i = 0; i < trees.size(); i++) { delete trees[i]; }
249                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
250                                         m->Groups.clear();
251                                         return 0;
252                                 }
253                                 
254                                 //then you did not find it so report it 
255                                 if (count == m->Treenames.size()) { 
256                                         //if it is in your namefile then don't remove
257                                         map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
258                                         
259                                         if (it == nameMap.end()) {
260                                                 m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
261                                                 tmap->removeSeq(tmap->namesOfSeqs[i]);
262                                                 i--; //need this because removeSeq removes name from namesOfSeqs
263                                         }
264                                 }
265                         }
266                 }
267                 
268                 SharedUtil* util = new SharedUtil();
269                 util->setGroups(m->Groups, tmap->namesOfGroups, "treegroup");   //sets the groups the user wants to analyze
270                 delete util;
271                 
272                 //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
273                 for (int i = 0; i < m->Groups.size(); i++) { if (m->Groups[i] == "xxx") { m->Groups.erase(m->Groups.begin()+i);  break; }  }
274                  
275                 vector<string> outputNames;
276                 
277                 //for each of the users trees
278                 for(int i = 0; i < trees.size(); i++) {
279                 
280                         if (m->control_pressed) { delete tmap; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) {       remove(outputNames[j].c_str());         } return 0; }
281                         
282                         ofstream outSum, outRare, outCollect;
283                         string outSumFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + ".phylodiv.summary";
284                         string outRareFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + ".phylodiv.rarefaction";
285                         string outCollectFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + ".phylodiv";
286                         
287                         if (summary)    { m->openOutputFile(outSumFile, outSum); outputNames.push_back(outSumFile);             outputTypes["summary"].push_back(outSumFile);                   }
288                         if (rarefy)             { m->openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile);  outputTypes["rarefy"].push_back(outRareFile);                   }
289                         if (collect)    { m->openOutputFile(outCollectFile, outCollect); outputNames.push_back(outCollectFile);  outputTypes["phylodiv"].push_back(outCollectFile);  }
290                         
291                         int numLeafNodes = trees[i]->getNumLeaves();
292                         
293                         //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
294                         vector<int> randomLeaf;
295                         for (int j = 0; j < numLeafNodes; j++) {  
296                                 if (m->inUsersGroups(trees[i]->tree[j].getGroup(), m->Groups) == true) { //is this a node from the group the user selected.
297                                         randomLeaf.push_back(j); 
298                                 }
299                         }
300                         
301                         numLeafNodes = randomLeaf.size();  //reset the number of leaf nodes you are using 
302                         
303                         //each group, each sampling, if no rarefy iters = 1;
304                         map<string, vector<float> > diversity;
305                         
306                         //each group, each sampling, if no rarefy iters = 1;
307                         map<string, vector<float> > sumDiversity;
308                         
309                         //find largest group total 
310                         int largestGroup = 0;
311                         for (int j = 0; j < m->Groups.size(); j++) {  
312                                 if (tmap->seqsPerGroup[m->Groups[j]] > largestGroup) { largestGroup = tmap->seqsPerGroup[m->Groups[j]]; }
313                                 
314                                 //initialize diversity
315                                 diversity[m->Groups[j]].resize(tmap->seqsPerGroup[m->Groups[j]]+1, 0.0);                //numSampled
316                                                                                                                                                                                                                         //groupA                0.0                     0.0
317                                                                                                                                                                                                                         
318                                 //initialize sumDiversity
319                                 sumDiversity[m->Groups[j]].resize(tmap->seqsPerGroup[m->Groups[j]]+1, 0.0);
320                         }       
321
322                         //convert freq percentage to number
323                         int increment = 100;
324                         if (freq < 1.0) {  increment = largestGroup * freq;  
325                         }else { increment = freq;  }
326                         
327                         //initialize sampling spots
328                         set<int> numSampledList;
329                         for(int k = 1; k <= largestGroup; k++){  if((k == 1) || (k % increment == 0)){  numSampledList.insert(k); }   }
330                         if(largestGroup % increment != 0){      numSampledList.insert(largestGroup);   }
331                         
332                         //add other groups ending points
333                         for (int j = 0; j < m->Groups.size(); j++) {  
334                                 if (numSampledList.count(diversity[m->Groups[j]].size()-1) == 0) {  numSampledList.insert(diversity[m->Groups[j]].size()-1); }
335                         }
336                         
337                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
338                                 if(processors == 1){
339                                         driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
340                                 }else{
341                                         if (rarefy) {
342                                                 vector<int> procIters;
343                                                 
344                                                 int numItersPerProcessor = iters / processors;
345                                                 
346                                                 //divide iters between processes
347                                                 for (int h = 0; h < processors; h++) {
348                                                         if(h == processors - 1){
349                                                                 numItersPerProcessor = iters - h * numItersPerProcessor;
350                                                         }
351                                                         procIters.push_back(numItersPerProcessor);
352                                                 }
353                                                 
354                                                 createProcesses(procIters, trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum); 
355                                                 
356                                         }else{ //no need to paralellize if you dont want to rarefy
357                                                 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
358                                         }
359                                 }
360
361                         #else
362                                 driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);      
363                         #endif
364
365                         if (rarefy) {   printData(numSampledList, sumDiversity, outRare, iters);        }
366                 }
367                 
368         
369                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } return 0; }
370
371                 m->mothurOutEndLine();
372                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
373                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
374                 m->mothurOutEndLine();
375
376                 
377                 return 0;
378         }
379         catch(exception& e) {
380                 m->errorOut(e, "PhyloDiversityCommand", "execute");
381                 exit(1);
382         }
383 }
384 //**********************************************************************************************************************
385 int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum){
386         try {
387                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
388                 int process = 1;
389                 
390                 vector<int> processIDS;
391                 map< string, vector<float> >::iterator itSum;
392                 
393                 //loop through and create all the processes you want
394                 while (process != processors) {
395                         int pid = fork();
396                         
397                         if (pid > 0) {
398                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
399                                 process++;
400                         }else if (pid == 0){
401                                 driver(t, div, sumDiv, procIters[process], increment, randomLeaf, numSampledList, outCollect, outSum, false);
402                                 
403                                 string outTemp = outputDir + toString(getpid()) + ".sumDiv.temp";
404                                 ofstream out;
405                                 m->openOutputFile(outTemp, out);
406                                 
407                                 //output the sumDIversity
408                                 for (itSum = sumDiv.begin(); itSum != sumDiv.end(); itSum++) {
409                                         out << itSum->first << '\t' << (itSum->second).size() << '\t';
410                                         for (int k = 0; k < (itSum->second).size(); k++) { 
411                                                 out << (itSum->second)[k] << '\t';
412                                         }
413                                         out << endl;
414                                 }
415                                 
416                                 out.close();
417                                 
418                                 exit(0);
419                         }else { 
420                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
421                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
422                                 exit(0);
423                         }
424                 }
425                 
426                 driver(t, div, sumDiv, procIters[0], increment, randomLeaf, numSampledList, outCollect, outSum, true);
427                 
428                 //force parent to wait until all the processes are done
429                 for (int i=0;i<(processors-1);i++) { 
430                         int temp = processIDS[i];
431                         wait(&temp);
432                 }
433                 
434                 //get data created by processes
435                 for (int i=0;i<(processors-1);i++) { 
436                         
437                         //input the sumDIversity
438                         string inTemp = outputDir + toString(processIDS[i]) + ".sumDiv.temp";
439                         ifstream in;
440                         m->openInputFile(inTemp, in);
441                                 
442                         //output the sumDIversity
443                         for (int j = 0; j < sumDiv.size(); j++) { 
444                                 string group = "";
445                                 int size = 0;
446                                 
447                                 in >> group >> size; m->gobble(in);
448                                 
449                                 for (int k = 0; k < size; k++) { 
450                                         float tempVal;
451                                         in >> tempVal;
452                                         
453                                         sumDiv[group][k] += tempVal;
454                                 }
455                                 m->gobble(in);
456                         }
457                                 
458                         in.close();
459                         remove(inTemp.c_str());
460                 }
461                 
462 #endif
463
464         return 0;               
465         
466         }
467         catch(exception& e) {
468                 m->errorOut(e, "PhyloDiversityCommand", "createProcesses");
469                 exit(1);
470         }
471 }
472 //**********************************************************************************************************************
473 int PhyloDiversityCommand::driver(Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum, bool doSumCollect){
474         try {
475                 int numLeafNodes = randomLeaf.size();
476         
477                 for (int l = 0; l < numIters; l++) {
478                                 random_shuffle(randomLeaf.begin(), randomLeaf.end());
479                 
480                                 //initialize counts
481                                 map<string, int> counts;
482                                 map< string, set<int> > countedBranch;  
483                                 for (int j = 0; j < m->Groups.size(); j++) {  counts[m->Groups[j]] = 0; countedBranch[m->Groups[j]].insert(-2);  }  //add dummy index to initialize countedBranch sets
484                                 
485                                 for(int k = 0; k < numLeafNodes; k++){
486                                                 
487                                         if (m->control_pressed) { return 0; }
488                                         
489                                         //calc branch length of randomLeaf k
490                                         vector<float> br = calcBranchLength(t, randomLeaf[k], countedBranch);
491                         
492                                         //for each group in the groups update the total branch length accounting for the names file
493                                         vector<string> groups = t->tree[randomLeaf[k]].getGroup();
494                                         
495                                         for (int j = 0; j < groups.size(); j++) {
496                                                 int numSeqsInGroupJ = 0;
497                                                 map<string, int>::iterator it;
498                                                 it = t->tree[randomLeaf[k]].pcount.find(groups[j]);
499                                                 if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
500                                                         numSeqsInGroupJ = it->second;
501                                                 }
502                                                 
503                                                 if (numSeqsInGroupJ != 0) {     div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j];  }
504                                                 
505                                                 for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
506                                                         div[groups[j]][s] = div[groups[j]][s-1];  //update counts, but don't add in redundant branch lengths
507                                                 }
508                                                 counts[groups[j]] += numSeqsInGroupJ;
509                                         }
510                                 }
511                                 
512                                 if (rarefy) {
513                                         //add this diversity to the sum
514                                         for (int j = 0; j < m->Groups.size(); j++) {  
515                                                 for (int g = 0; g < div[m->Groups[j]].size(); g++) {
516                                                         sumDiv[m->Groups[j]][g] += div[m->Groups[j]][g];
517                                                 }
518                                         }
519                                 }
520                                 
521                                 if ((collect) && (l == 0) && doSumCollect) {  printData(numSampledList, div, outCollect, 1);  }
522                                 if ((summary) && (l == 0) && doSumCollect) {  printSumData(div, outSum, 1);  }
523                         }
524                         
525                         return 0;
526
527         }
528         catch(exception& e) {
529                 m->errorOut(e, "PhyloDiversityCommand", "driver");
530                 exit(1);
531         }
532 }
533
534 //**********************************************************************************************************************
535
536 void PhyloDiversityCommand::printSumData(map< string, vector<float> >& div, ofstream& out, int numIters){
537         try {
538                 
539                 out << "Groups\tnumSampled\tphyloDiversity" << endl;
540                 
541                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
542                         
543                 for (int j = 0; j < m->Groups.size(); j++) {
544                         int numSampled = (div[m->Groups[j]].size()-1);
545                         out << m->Groups[j] << '\t' << numSampled << '\t';
546                 
547                          
548                         float score;
549                         if (scale)      {  score = (div[m->Groups[j]][numSampled] / (float)numIters) / (float)numSampled;       }
550                         else            {       score = div[m->Groups[j]][numSampled] / (float)numIters;        }
551                                 
552                         out << setprecision(4) << score << endl;
553                 }
554                                         
555                 out.close();
556                 
557         }
558         catch(exception& e) {
559                 m->errorOut(e, "PhyloDiversityCommand", "printSumData");
560                 exit(1);
561         }
562 }
563 //**********************************************************************************************************************
564
565 void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float> >& div, ofstream& out, int numIters){
566         try {
567                 
568                 out << "numSampled\t";
569                 for (int i = 0; i < m->Groups.size(); i++) { out << m->Groups[i] << '\t';  }
570                 out << endl;
571                 
572                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
573                 
574                 for (set<int>::iterator it = num.begin(); it != num.end(); it++) {  
575                         int numSampled = *it;
576                         
577                         out << numSampled << '\t';  
578                         
579                         for (int j = 0; j < m->Groups.size(); j++) {
580                                 if (numSampled < div[m->Groups[j]].size()) { 
581                                         float score;
582                                         if (scale)      {  score = (div[m->Groups[j]][numSampled] / (float)numIters) / (float)numSampled;       }
583                                         else            {       score = div[m->Groups[j]][numSampled] / (float)numIters;        }
584
585                                         out << setprecision(4) << score << '\t';
586                                 }else { out << "NA" << '\t'; }
587                         }
588                         out << endl;
589                 }
590                 
591                 out.close();
592                 
593         }
594         catch(exception& e) {
595                 m->errorOut(e, "PhyloDiversityCommand", "printData");
596                 exit(1);
597         }
598 }
599 //**********************************************************************************************************************
600 //need a vector of floats one branch length for every group the node represents.
601 vector<float> PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< string, set<int> >& counted){
602         try {
603
604                 //calc the branch length
605                 //while you aren't at root
606                 vector<float> sums; 
607                 int index = leaf;
608                 
609                 vector<string> groups = t->tree[leaf].getGroup();
610                 sums.resize(groups.size(), 0.0);
611                 
612                 map<string, map<int, double> > tempTotals; //maps node to total Branch Length
613                 map< string, set<int> > tempCounted;
614                 set<int>::iterator it;
615         
616                 //you are a leaf
617                 if(t->tree[index].getBranchLength() != -1){     
618                         for (int k = 0; k < groups.size(); k++) { 
619                                 sums[k] += abs(t->tree[index].getBranchLength());       
620                                 counted[groups[k]].insert(index);
621                         }
622                 }
623                 
624                 for (int k = 0; k < groups.size(); k++) { 
625                         tempTotals[groups[k]][index] = 0.0;     
626                 }
627                 
628                 index = t->tree[index].getParent();     
629                         
630                 //while you aren't at root
631                 while(t->tree[index].getParent() != -1){
632
633                         if (m->control_pressed) {  return sums; }
634                         
635                         int pcountSize = 0;     
636                         for (int k = 0; k < groups.size(); k++) {
637                                 map<string, int>::iterator itGroup = t->tree[index].pcount.find(groups[k]);
638                                 if (itGroup != t->tree[index].pcount.end()) { pcountSize++;  } 
639                         
640                                 //do both your chidren have have descendants from the users groups? 
641                                 int lc = t->tree[index].getLChild();
642                                 int rc = t->tree[index].getRChild();
643                         
644                                 int LpcountSize = 0;
645                                 itGroup = t->tree[lc].pcount.find(groups[k]);
646                                 if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++;  } 
647                                                         
648                                 int RpcountSize = 0;
649                                 itGroup = t->tree[rc].pcount.find(groups[k]);
650                                 if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++;  } 
651                                                                 
652                                 //if yes, add your childrens tempTotals
653                                 if ((LpcountSize != 0) && (RpcountSize != 0)) {
654                                         sums[k] += tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc]; 
655                                         
656                                         for (it = tempCounted[groups[k]].begin(); it != tempCounted[groups[k]].end(); it++) { counted[groups[k]].insert(*it); }
657
658                                         //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl;
659                                         if (t->tree[index].getBranchLength() != -1) {
660                                                 if (counted[groups[k]].count(index) == 0) {
661                                                         tempTotals[groups[k]][index] = abs(t->tree[index].getBranchLength());
662                                                         tempCounted[groups[k]].insert(index);
663                                                 }else{
664                                                         tempTotals[groups[k]][index] = 0.0;
665                                                 }
666                                         }else {
667                                                 tempTotals[groups[k]][index] = 0.0;
668                                         }
669                                 }else { //if no, your tempTotal is your childrens temp totals + your branch length
670                                         tempTotals[groups[k]][index] = tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc]; 
671                                                                         
672                                         if (counted[groups[k]].count(index) == 0) {
673                                                 tempTotals[groups[k]][index] += abs(t->tree[index].getBranchLength());
674                                                 tempCounted[groups[k]].insert(index);
675                                         }
676
677                                 }
678                                 //cout << "temptotal = "<< tempTotals[i] << endl;
679                         }
680                         
681                         index = t->tree[index].getParent();     
682                 }
683
684                 return sums;
685
686         }
687         catch(exception& e) {
688                 m->errorOut(e, "PhyloDiversityCommand", "calcBranchLength");
689                 exit(1);
690         }
691 }
692 /*****************************************************************/
693 int PhyloDiversityCommand::readNamesFile() {
694         try {
695                 m->names.clear();
696                 numUniquesInName = 0;
697                 
698                 ifstream in;
699                 m->openInputFile(namefile, in);
700                 
701                 string first, second;
702                 map<string, string>::iterator itNames;
703                 
704                 while(!in.eof()) {
705                         in >> first >> second; m->gobble(in);
706                         
707                         numUniquesInName++;
708                         
709                         itNames = m->names.find(first);
710                         if (itNames == m->names.end()) {  
711                                 m->names[first] = second; 
712                                 
713                                 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
714                                 vector<string> dupNames;
715                                 m->splitAtComma(second, dupNames);
716                                 
717                                 for (int i = 0; i < dupNames.size(); i++) {     
718                                         nameMap[dupNames[i]] = dupNames[i]; 
719                                         if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); } 
720                                 }
721                         }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }                  
722                 }
723                 in.close();
724                 
725                 return 0;
726         }
727         catch(exception& e) {
728                 m->errorOut(e, "PhyloDiversityCommand", "readNamesFile");
729                 exit(1);
730         }
731 }
732
733 //**********************************************************************************************************************
734
735
736