]> git.donarmstrong.com Git - mothur.git/blob - unifracweightedcommand.cpp
added set.current and get.current commands and modified existing commands to update...
[mothur.git] / unifracweightedcommand.cpp
1 /*
2  *  unifracweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracweightedcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> UnifracWeightedCommand::getValidParameters(){    
14         try {
15                 string Array[] =  {"groups","iters","distance","random","processors","root","outputdir","inputdir"};
16                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
17                 return myArray;
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "UnifracWeightedCommand", "getValidParameters");
21                 exit(1);
22         }
23 }
24 //**********************************************************************************************************************
25 UnifracWeightedCommand::UnifracWeightedCommand(){       
26         try {
27                 abort = true; calledHelp = true; 
28                 vector<string> tempOutNames;
29                 outputTypes["weighted"] = tempOutNames;
30                 outputTypes["wsummary"] = tempOutNames;
31                 outputTypes["phylip"] = tempOutNames;
32                 outputTypes["column"] = tempOutNames;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 vector<string> UnifracWeightedCommand::getRequiredParameters(){ 
41         try {
42                 vector<string> myArray;
43                 return myArray;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "UnifracWeightedCommand", "getRequiredParameters");
47                 exit(1);
48         }
49 }
50 //**********************************************************************************************************************
51 vector<string> UnifracWeightedCommand::getRequiredFiles(){      
52         try {
53                 string Array[] =  {"tree","group"};
54                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
55
56                 return myArray;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "UnifracWeightedCommand", "getRequiredFiles");
60                 exit(1);
61         }
62 }
63 /***********************************************************/
64 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
65         try {
66                 globaldata = GlobalData::getInstance();
67                 abort = false; calledHelp = false;   
68                 Groups.clear();
69                         
70                 //allow user to run help
71                 if(option == "help") { help(); abort = true; calledHelp = true; }
72                 
73                 else {
74                         //valid paramters for this command
75                         string Array[] =  {"groups","iters","distance","random","processors","root","outputdir","inputdir"};
76                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
77                         
78                         OptionParser parser(option);
79                         map<string,string> parameters=parser.getParameters();
80                         
81                         ValidParameters validParameter;
82                 
83                         //check to make sure all parameters are valid for command
84                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
85                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
86                         }
87                         
88                         //initialize outputTypes
89                         vector<string> tempOutNames;
90                         outputTypes["weighted"] = tempOutNames;
91                         outputTypes["wsummary"] = tempOutNames;
92                         outputTypes["phylip"] = tempOutNames;
93                         outputTypes["column"] = tempOutNames;
94                         
95                         if (globaldata->gTree.size() == 0) {//no trees were read
96                                 m->mothurOut("You must execute the read.tree command, before you may execute the unifrac.weighted command."); m->mothurOutEndLine(); abort = true;  }
97                         
98                         //if the user changes the output directory command factory will send this info to us in the output parameter 
99                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
100                                 outputDir = ""; 
101                                 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it       
102                         }
103                                                                                                                                         
104                         //check for optional parameter and set defaults
105                         // ...at some point should added some additional type checking...
106                         groups = validParameter.validFile(parameters, "groups", false);                 
107                         if (groups == "not found") { groups = ""; }
108                         else { 
109                                 m->splitAtDash(groups, Groups);
110                                 globaldata->Groups = Groups;
111                         }
112                                 
113                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
114                         convert(itersString, iters); 
115                         
116                         string temp = validParameter.validFile(parameters, "distance", false);                  
117                         if (temp == "not found") { phylip = false; outputForm = ""; }
118                         else{
119                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
120                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
121                         }
122                         
123                         temp = validParameter.validFile(parameters, "random", false);                           if (temp == "not found") { temp = "F"; }
124                         random = m->isTrue(temp);
125                         
126                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
127                         includeRoot = m->isTrue(temp);
128                         
129                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
130                         convert(temp, processors); 
131                         
132                         if (!random) {  iters = 0;  } //turn off random calcs
133
134                         
135                         if (abort == false) {
136                                 T = globaldata->gTree;
137                                 tmap = globaldata->gTreemap;
138                                 sumFile = outputDir + m->getSimpleName(globaldata->getTreeFile()) + ".wsummary";
139                                 m->openOutputFile(sumFile, outSum);
140                                 outputNames.push_back(sumFile);  outputTypes["wsummary"].push_back(sumFile);
141                                 
142                                 util = new SharedUtil();
143                                 string s; //to make work with setgroups
144                                 util->setGroups(globaldata->Groups, tmap->namesOfGroups, s, numGroups, "weighted");     //sets the groups the user wants to analyze
145                                 util->getCombos(groupComb, globaldata->Groups, numComp);
146                                 
147                                 weighted = new Weighted(tmap, includeRoot);
148                                 
149                         }
150                 }
151                 
152                 
153         }
154         catch(exception& e) {
155                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
156                 exit(1);
157         }
158 }
159 //**********************************************************************************************************************
160
161 void UnifracWeightedCommand::help(){
162         try {
163                 m->mothurOut("The unifrac.weighted command can only be executed after a successful read.tree command.\n");
164                 m->mothurOut("The unifrac.weighted command parameters are groups, iters, distance, processors, root and random.  No parameters are required.\n");
165                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n");
166                 m->mothurOut("The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n");
167                 m->mothurOut("The distance parameter allows you to create a distance file from the results. The default is false.\n");
168                 m->mothurOut("The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n");
169                 m->mothurOut("The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n");
170                 m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n");
171                 m->mothurOut("The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n");
172                 m->mothurOut("Example unifrac.weighted(groups=A-B-C, iters=500).\n");
173                 m->mothurOut("The default value for groups is all the groups in your groupfile, and iters is 1000.\n");
174                 m->mothurOut("The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n");
175                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
176         }
177         catch(exception& e) {
178                 m->errorOut(e, "UnifracWeightedCommand", "help");
179                 exit(1);
180         }
181 }
182
183 /***********************************************************/
184 int UnifracWeightedCommand::execute() {
185         try {
186         
187                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
188                 
189                 int start = time(NULL);
190                 
191                 //get weighted for users tree
192                 userData.resize(numComp,0);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
193                 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
194                 
195                 if (numComp < processors) { processors = numComp; }
196                                 
197                 //get weighted scores for users trees
198                 for (int i = 0; i < T.size(); i++) {
199                         
200                         if (m->control_pressed) { outSum.close(); for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } return 0; }
201
202                         counter = 0;
203                         rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
204                         uScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
205                         
206                         if (random) {  
207                                 output = new ColumnFile(outputDir + m->getSimpleName(globaldata->getTreeFile())  + toString(i+1) + ".weighted", itersString);  
208                                 outputNames.push_back(outputDir + m->getSimpleName(globaldata->getTreeFile())  + toString(i+1) + ".weighted");
209                                 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(globaldata->getTreeFile())  + toString(i+1) + ".weighted");
210                         } 
211
212                         userData = weighted->getValues(T[i], processors, outputDir);  //userData[0] = weightedscore
213                         
214                         if (m->control_pressed) { if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str());  } return 0; }
215                         
216                         //save users score
217                         for (int s=0; s<numComp; s++) {
218                                 //add users score to vector of user scores
219                                 uScores[s].push_back(userData[s]);
220                                 
221                                 //save users tree score for summary file
222                                 utreeScores.push_back(userData[s]);
223                         }
224                         
225                         if (random) { 
226                         
227                                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
228                                 vector< vector<string> > namesOfGroupCombos;
229                                 for (int a=0; a<numGroups; a++) { 
230                                         for (int l = 0; l < a; l++) {   
231                                                 vector<string> groups; groups.push_back(globaldata->Groups[a]); groups.push_back(globaldata->Groups[l]);
232                                                 namesOfGroupCombos.push_back(groups);
233                                         }
234                                 }
235                                 
236                                 lines.clear();
237                                 
238                                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
239                                         if(processors != 1){
240                                                 int numPairs = namesOfGroupCombos.size();
241                                                 int numPairsPerProcessor = numPairs / processors;
242                                         
243                                                 for (int i = 0; i < processors; i++) {
244                                                         int startPos = i * numPairsPerProcessor;
245                                                         if(i == processors - 1){
246                                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
247                                                         }
248                                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
249                                                 }
250                                         }
251                                 #endif
252
253                                 
254                                 //get scores for random trees
255                                 for (int j = 0; j < iters; j++) {
256                                 
257                                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
258                                                 if(processors == 1){
259                                                         driver(T[i],  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
260                                                 }else{
261                                                         createProcesses(T[i],  namesOfGroupCombos, rScores);
262                                                 }
263                                         #else
264                                                 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
265                                         #endif
266                                         
267                                         if (m->control_pressed) { delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str());  } return 0; }
268                                         
269                                         //report progress
270 //                                      m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();          
271                                 }
272                                 lines.clear();
273                         
274                                 //find the signifigance of the score for summary file
275                                 for (int f = 0; f < numComp; f++) {
276                                         //sort random scores
277                                         sort(rScores[f].begin(), rScores[f].end());
278                                         
279                                         //the index of the score higher than yours is returned 
280                                         //so if you have 1000 random trees the index returned is 100 
281                                         //then there are 900 trees with a score greater then you. 
282                                         //giving you a signifigance of 0.900
283                                         int index = findIndex(userData[f], f);    if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
284                                         
285                                         //the signifigance is the number of trees with the users score or higher 
286                                         WScoreSig.push_back((iters-index)/(float)iters);
287                                 }
288                                 
289                                 //out << "Tree# " << i << endl;
290                                 calculateFreqsCumuls();
291                                 printWeightedFile();
292                                 
293                                 delete output;
294                         
295                         }
296                         
297                         //clear data
298                         rScores.clear();
299                         uScores.clear();
300                         validScores.clear();
301                 }
302                 
303                 
304                 if (m->control_pressed) { outSum.close(); for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } return 0;  }
305                 
306                 printWSummaryFile();
307                 
308                 if (phylip) {   createPhylipFile();             }
309
310                 //clear out users groups
311                 globaldata->Groups.clear();
312                 
313                 
314                 if (m->control_pressed) { 
315                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  }
316                         return 0; 
317                 }
318                 
319                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
320                 
321                 //set phylip file as new current phylipfile
322                 string current = "";
323                 itTypes = outputTypes.find("phylip");
324                 if (itTypes != outputTypes.end()) {
325                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
326                 }
327                 
328                 //set column file as new current columnfile
329                 itTypes = outputTypes.find("column");
330                 if (itTypes != outputTypes.end()) {
331                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
332                 }
333                 
334                 m->mothurOutEndLine();
335                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
336                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
337                 m->mothurOutEndLine();
338                 
339                 return 0;
340                 
341         }
342         catch(exception& e) {
343                 m->errorOut(e, "UnifracWeightedCommand", "execute");
344                 exit(1);
345         }
346 }
347 /**************************************************************************************************/
348
349 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
350         try {
351 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
352                 int process = 1;
353                 vector<int> processIDS;
354                 
355                 EstOutput results;
356                 
357                 //loop through and create all the processes you want
358                 while (process != processors) {
359                         int pid = fork();
360                         
361                         if (pid > 0) {
362                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
363                                 process++;
364                         }else if (pid == 0){
365                                 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
366                         
367                                 //pass numSeqs to parent
368                                 ofstream out;
369                                 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
370                                 m->openOutputFile(tempFile, out);
371                                 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t';  } out << endl;
372                                 out.close();
373                                 
374                                 exit(0);
375                         }else { 
376                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
377                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
378                                 exit(0);
379                         }
380                 }
381                 
382                 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
383                 
384                 //force parent to wait until all the processes are done
385                 for (int i=0;i<(processors-1);i++) { 
386                         int temp = processIDS[i];
387                         wait(&temp);
388                 }
389                 
390                 //get data created by processes
391                 for (int i=0;i<(processors-1);i++) { 
392         
393                         ifstream in;
394                         string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
395                         m->openInputFile(s, in);
396                         
397                         double tempScore;
398                         for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
399                         in.close();
400                         remove(s.c_str());
401                 }
402                 
403                 return 0;
404 #endif          
405         }
406         catch(exception& e) {
407                 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
408                 exit(1);
409         }
410 }
411
412 /**************************************************************************************************/
413 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) { 
414  try {
415                 Tree* randT = new Tree();
416
417                 for (int h = start; h < (start+num); h++) {
418         
419                         if (m->control_pressed) { return 0; }
420                 
421                         //initialize weighted score
422                         string groupA = namesOfGroupCombos[h][0]; 
423                         string groupB = namesOfGroupCombos[h][1];
424                         
425                         //copy T[i]'s info.
426                         randT->getCopy(t);
427                          
428                         //create a random tree with same topology as T[i], but different labels
429                         randT->assembleRandomUnifracTree(groupA, groupB);
430                         
431                         if (m->control_pressed) { delete randT;  return 0;  }
432
433                         //get wscore of random tree
434                         EstOutput randomData = weighted->getValues(randT, groupA, groupB);
435                 
436                         if (m->control_pressed) { delete randT;  return 0;  }
437                                                                                 
438                         //save scores
439                         scores[h].push_back(randomData[0]);
440                 }
441         
442                 delete randT;
443         
444                 return 0;
445
446         }
447         catch(exception& e) {
448                 m->errorOut(e, "UnifracWeightedCommand", "driver");
449                 exit(1);
450         }
451 }
452 /***********************************************************/
453 void UnifracWeightedCommand::printWeightedFile() {
454         try {
455                 vector<double> data;
456                 vector<string> tags;
457                 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
458                 
459                 for(int a = 0; a < numComp; a++) {
460                         output->initFile(groupComb[a], tags);
461                         //print each line
462                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
463                                 data.push_back(it->first);  data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
464                                 output->output(data);
465                                 data.clear();
466                         } 
467                         output->resetFile();
468                 }
469         }
470         catch(exception& e) {
471                 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
472                 exit(1);
473         }
474 }
475
476
477 /***********************************************************/
478 void UnifracWeightedCommand::printWSummaryFile() {
479         try {
480                 //column headers
481                 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t' << "WSig" <<  endl;
482                 m->mothurOut("Tree#\tGroups\tWScore\tWSig"); m->mothurOutEndLine(); 
483                 
484                 //format output
485                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
486                 
487                 //print each line
488                 int count = 0;
489                 for (int i = 0; i < T.size(); i++) { 
490                         for (int j = 0; j < numComp; j++) {
491                                 if (random) {
492                                         if (WScoreSig[count] > (1/(float)iters)) {
493                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
494                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
495                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" +  toString(WScoreSig[count]) + "\n");   
496                                         }else{
497                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
498                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
499                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" +  toString((1/float(iters))) + "\n");  
500                                         }
501                                 }else{
502                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << "0.00" << endl; 
503                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << "0.00" << endl; 
504                                         m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t0.00\n"); 
505                                 }
506                                 count++;
507                         }
508                 }
509                 outSum.close();
510         }
511         catch(exception& e) {
512                 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
513                 exit(1);
514         }
515 }
516 /***********************************************************/
517 void UnifracWeightedCommand::createPhylipFile() {
518         try {
519                 int count = 0;
520                 //for each tree
521                 for (int i = 0; i < T.size(); i++) { 
522                 
523                         string phylipFileName;
524                         if ((outputForm == "lt") || (outputForm == "square")) {
525                                 phylipFileName = outputDir + m->getSimpleName(globaldata->getTreeFile())  + toString(i+1) + ".weighted.phylip.dist";
526                                 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
527                         }else { //column
528                                 phylipFileName = outputDir + m->getSimpleName(globaldata->getTreeFile())  + toString(i+1) + ".weighted.column.dist";
529                                 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
530                         }
531                         
532                         ofstream out;
533                         m->openOutputFile(phylipFileName, out);
534                         
535                         if ((outputForm == "lt") || (outputForm == "square")) {
536                                 //output numSeqs
537                                 out << globaldata->Groups.size() << endl;
538                         }
539
540                         //make matrix with scores in it
541                         vector< vector<float> > dists;  dists.resize(globaldata->Groups.size());
542                         for (int i = 0; i < globaldata->Groups.size(); i++) {
543                                 dists[i].resize(globaldata->Groups.size(), 0.0);
544                         }
545                         
546                         //flip it so you can print it
547                         for (int r=0; r<globaldata->Groups.size(); r++) { 
548                                 for (int l = 0; l < r; l++) {
549                                         dists[r][l] = utreeScores[count];
550                                         dists[l][r] = utreeScores[count];
551                                         count++;
552                                 }
553                         }
554
555                         //output to file
556                         for (int r=0; r<globaldata->Groups.size(); r++) { 
557                                 //output name
558                                 string name = globaldata->Groups[r];
559                                 if (name.length() < 10) { //pad with spaces to make compatible
560                                         while (name.length() < 10) {  name += " ";  }
561                                 }
562                                 
563                                 if (outputForm == "lt") {
564                                         out << name << '\t';
565                                         
566                                         //output distances
567                                         for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
568                                         out << endl;
569                                 }else if (outputForm == "square") {
570                                         out << name << '\t';
571                                         
572                                         //output distances
573                                         for (int l = 0; l < globaldata->Groups.size(); l++) {   out  << dists[r][l] << '\t';  }
574                                         out << endl;
575                                 }else{
576                                         //output distances
577                                         for (int l = 0; l < r; l++) {   
578                                                 string otherName = globaldata->Groups[l];
579                                                 if (otherName.length() < 10) { //pad with spaces to make compatible
580                                                         while (otherName.length() < 10) {  otherName += " ";  }
581                                                 }
582                                                 
583                                                 out  << name << '\t' << otherName << dists[r][l] << endl;  
584                                         }
585                                 }
586                         }
587                         out.close();
588                 }
589         }
590         catch(exception& e) {
591                 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
592                 exit(1);
593         }
594 }
595 /***********************************************************/
596 int UnifracWeightedCommand::findIndex(float score, int index) {
597         try{
598                 for (int i = 0; i < rScores[index].size(); i++) {
599                         if (rScores[index][i] >= score) {       return i;       }
600                 }
601                 return rScores[index].size();
602         }
603         catch(exception& e) {
604                 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
605                 exit(1);
606         }
607 }
608
609 /***********************************************************/
610
611 void UnifracWeightedCommand::calculateFreqsCumuls() {
612         try {
613                 //clear out old tree values
614                 rScoreFreq.clear();
615                 rScoreFreq.resize(numComp);
616                 rCumul.clear();
617                 rCumul.resize(numComp);
618                 validScores.clear();
619         
620                 //calculate frequency
621                 for (int f = 0; f < numComp; f++) {
622                         for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7...  you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
623                                 validScores[rScores[f][i]] = rScores[f][i];
624                                 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
625                                 if (it != rScoreFreq[f].end()) {
626                                         rScoreFreq[f][rScores[f][i]]++;
627                                 }else{
628                                         rScoreFreq[f][rScores[f][i]] = 1;
629                                 }
630                         }
631                 }
632                 
633                 //calculate rcumul
634                 for(int a = 0; a < numComp; a++) {
635                         float rcumul = 1.0000;
636                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
637                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
638                                 //make rscoreFreq map and rCumul
639                                 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
640                                 rCumul[a][it->first] = rcumul;
641                                 //get percentage of random trees with that info
642                                 if (it2 != rScoreFreq[a].end()) {  rScoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
643                                 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
644                         }
645                 }
646
647         }
648         catch(exception& e) {
649                 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
650                 exit(1);
651         }
652 }
653
654 /***********************************************************/
655
656
657
658
659