]> git.donarmstrong.com Git - mothur.git/blob - unifracweightedcommand.cpp
f868129fd2236a3aca06cf27f2e25b2229013ccf
[mothur.git] / unifracweightedcommand.cpp
1 /*
2  *  unifracweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracweightedcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> UnifracWeightedCommand::setParameters(){ 
14         try {
15                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
16                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
17                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
18                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
19                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
20                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21                 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
22                 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
23                 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26                 
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string UnifracWeightedCommand::getHelpString(){ 
38         try {
39                 string helpString = "";
40                 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root and random.  tree parameter is required unless you have valid current tree file.\n";
41                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
42                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
43                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
44                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
45                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
46                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
47                 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
48                 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
49                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
50                 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
51                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
52                 return helpString;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60 UnifracWeightedCommand::UnifracWeightedCommand(){       
61         try {
62                 abort = true; calledHelp = true; 
63                 setParameters();
64                 vector<string> tempOutNames;
65                 outputTypes["weighted"] = tempOutNames;
66                 outputTypes["wsummary"] = tempOutNames;
67                 outputTypes["phylip"] = tempOutNames;
68                 outputTypes["column"] = tempOutNames;
69         }
70         catch(exception& e) {
71                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
72                 exit(1);
73         }
74 }
75
76 /***********************************************************/
77 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
78         try {
79                 abort = false; calledHelp = false;   
80                         
81                 //allow user to run help
82                 if(option == "help") { help(); abort = true; calledHelp = true; }
83                 
84                 else {
85                         vector<string> myArray = setParameters();
86                         
87                         OptionParser parser(option);
88                         map<string,string> parameters=parser.getParameters();
89                         map<string,string>::iterator it;
90                         
91                         ValidParameters validParameter;
92                 
93                         //check to make sure all parameters are valid for command
94                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
95                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
96                         }
97                         
98                         //initialize outputTypes
99                         vector<string> tempOutNames;
100                         outputTypes["weighted"] = tempOutNames;
101                         outputTypes["wsummary"] = tempOutNames;
102                         outputTypes["phylip"] = tempOutNames;
103                         outputTypes["column"] = tempOutNames;
104                         
105                         //if the user changes the input directory command factory will send this info to us in the output parameter 
106                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
107                         if (inputDir == "not found"){   inputDir = "";          }
108                         else {
109                                 string path;
110                                 it = parameters.find("tree");
111                                 //user has given a template file
112                                 if(it != parameters.end()){ 
113                                         path = m->hasPath(it->second);
114                                         //if the user has not given a path then, add inputdir. else leave path alone.
115                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
116                                 }
117                                 
118                                 it = parameters.find("group");
119                                 //user has given a template file
120                                 if(it != parameters.end()){ 
121                                         path = m->hasPath(it->second);
122                                         //if the user has not given a path then, add inputdir. else leave path alone.
123                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
124                                 }
125                                 
126                                 it = parameters.find("name");
127                                 //user has given a template file
128                                 if(it != parameters.end()){ 
129                                         path = m->hasPath(it->second);
130                                         //if the user has not given a path then, add inputdir. else leave path alone.
131                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
132                                 }
133                         }
134                         
135                         m->runParse = true;
136                         m->Groups.clear();
137                         m->namesOfGroups.clear();
138                         m->Treenames.clear();
139                         m->names.clear();
140                         
141                         //check for required parameters
142                         treefile = validParameter.validFile(parameters, "tree", true);
143                         if (treefile == "not open") { abort = true; }
144                         else if (treefile == "not found") {                             //if there is a current design file, use it
145                                 treefile = m->getTreeFile(); 
146                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
147                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
148                         }       
149                         
150                         //check for required parameters
151                         groupfile = validParameter.validFile(parameters, "group", true);
152                         if (groupfile == "not open") { abort = true; }
153                         else if (groupfile == "not found") { groupfile = ""; }
154                         
155                         namefile = validParameter.validFile(parameters, "name", true);
156                         if (namefile == "not open") { abort = true; }
157                         else if (namefile == "not found") { namefile = ""; }
158                         
159                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
160                         
161                                                                                                                                         
162                         //check for optional parameter and set defaults
163                         // ...at some point should added some additional type checking...
164                         groups = validParameter.validFile(parameters, "groups", false);                 
165                         if (groups == "not found") { groups = ""; }
166                         else { 
167                                 m->splitAtDash(groups, Groups);
168                                 m->Groups = Groups;
169                         }
170                                 
171                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
172                         convert(itersString, iters); 
173                         
174                         string temp = validParameter.validFile(parameters, "distance", false);                  
175                         if (temp == "not found") { phylip = false; outputForm = ""; }
176                         else{
177                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
178                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
179                         }
180                         
181                         temp = validParameter.validFile(parameters, "random", false);                           if (temp == "not found") { temp = "F"; }
182                         random = m->isTrue(temp);
183                         
184                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
185                         includeRoot = m->isTrue(temp);
186                         
187                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
188                         m->setProcessors(temp);
189                         convert(temp, processors);
190                         
191                         if (!random) {  iters = 0;  } //turn off random calcs
192                 }
193                 
194                 
195         }
196         catch(exception& e) {
197                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
198                 exit(1);
199         }
200 }
201 /***********************************************************/
202 int UnifracWeightedCommand::execute() {
203         try {
204         
205                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
206                 
207                 m->setTreeFile(treefile);
208                 
209                 if (groupfile != "") {
210                         //read in group map info.
211                         tmap = new TreeMap(groupfile);
212                         tmap->readMap();
213                 }else{ //fake out by putting everyone in one group
214                         Tree* tree = new Tree(treefile); delete tree;  //extracts names from tree to make faked out groupmap
215                         tmap = new TreeMap();
216                         
217                         for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
218                 }
219                 
220                 if (namefile != "") { readNamesFile(); }
221                 
222                 read = new ReadNewickTree(treefile);
223                 int readOk = read->read(tmap); 
224                 
225                 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
226                 
227                 read->AssembleTrees();
228                 T = read->getTrees();
229                 delete read;
230                 
231                 //make sure all files match
232                 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
233                 int numNamesInTree;
234                 if (namefile != "")  {  
235                         if (numUniquesInName == m->Treenames.size()) {  numNamesInTree = nameMap.size();  }
236                         else {   numNamesInTree = m->Treenames.size();  }
237                 }else {  numNamesInTree = m->Treenames.size();  }
238                 
239                 
240                 //output any names that are in group file but not in tree
241                 if (numNamesInTree < tmap->getNumSeqs()) {
242                         for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
243                                 //is that name in the tree?
244                                 int count = 0;
245                                 for (int j = 0; j < m->Treenames.size(); j++) {
246                                         if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
247                                         count++;
248                                 }
249                                 
250                                 if (m->control_pressed) { 
251                                         delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
252                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } outputTypes.clear();
253                                         m->Groups.clear();
254                                         return 0;
255                                 }
256                                 
257                                 //then you did not find it so report it 
258                                 if (count == m->Treenames.size()) { 
259                                         //if it is in your namefile then don't remove
260                                         map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
261                                         
262                                         if (it == nameMap.end()) {
263                                                 m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
264                                                 tmap->removeSeq(tmap->namesOfSeqs[i]);
265                                                 i--; //need this because removeSeq removes name from namesOfSeqs
266                                         }
267                                 }
268                         }
269                 }
270                 
271                 sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
272                 m->openOutputFile(sumFile, outSum);
273                 outputNames.push_back(sumFile);  outputTypes["wsummary"].push_back(sumFile);
274                         
275                 util = new SharedUtil();
276                 string s; //to make work with setgroups
277                 util->setGroups(m->Groups, tmap->namesOfGroups, s, numGroups, "weighted");      //sets the groups the user wants to analyze
278                 util->getCombos(groupComb, m->Groups, numComp);
279                 delete util;
280                 
281                 weighted = new Weighted(tmap, includeRoot);
282                         
283                 int start = time(NULL);
284                 
285                 //get weighted for users tree
286                 userData.resize(numComp,0);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
287                 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
288                 
289                 if (numComp < processors) { processors = numComp; }
290                                 
291                 //get weighted scores for users trees
292                 for (int i = 0; i < T.size(); i++) {
293                         
294                         if (m->control_pressed) { delete tmap; delete weighted;
295                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      remove(outputNames[i].c_str());  } return 0; }
296
297                         counter = 0;
298                         rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
299                         uScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
300                         
301                         if (random) {  
302                                 output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted", itersString);  
303                                 outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
304                                 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
305                         } 
306
307                         userData = weighted->getValues(T[i], processors, outputDir);  //userData[0] = weightedscore
308                         
309                         if (m->control_pressed) { delete tmap; delete weighted;
310                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());  } return 0; }
311                         
312                         //save users score
313                         for (int s=0; s<numComp; s++) {
314                                 //add users score to vector of user scores
315                                 uScores[s].push_back(userData[s]);
316                                 
317                                 //save users tree score for summary file
318                                 utreeScores.push_back(userData[s]);
319                         }
320                         
321                         if (random) { 
322                         
323                                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
324                                 vector< vector<string> > namesOfGroupCombos;
325                                 for (int a=0; a<numGroups; a++) { 
326                                         for (int l = 0; l < a; l++) {   
327                                                 vector<string> groups; groups.push_back(m->Groups[a]); groups.push_back(m->Groups[l]);
328                                                 namesOfGroupCombos.push_back(groups);
329                                         }
330                                 }
331                                 
332                                 lines.clear();
333                                 
334                                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
335                                         if(processors != 1){
336                                                 int numPairs = namesOfGroupCombos.size();
337                                                 int numPairsPerProcessor = numPairs / processors;
338                                         
339                                                 for (int i = 0; i < processors; i++) {
340                                                         int startPos = i * numPairsPerProcessor;
341                                                         if(i == processors - 1){
342                                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
343                                                         }
344                                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
345                                                 }
346                                         }
347                                 #endif
348
349                                 
350                                 //get scores for random trees
351                                 for (int j = 0; j < iters; j++) {
352                                 
353                                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
354                                                 if(processors == 1){
355                                                         driver(T[i],  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
356                                                 }else{
357                                                         createProcesses(T[i],  namesOfGroupCombos, rScores);
358                                                 }
359                                         #else
360                                                 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
361                                         #endif
362                                         
363                                         if (m->control_pressed) { delete tmap; delete weighted;
364                                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());  } return 0; }
365                                         
366                                         //report progress
367 //                                      m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();          
368                                 }
369                                 lines.clear();
370                         
371                                 //find the signifigance of the score for summary file
372                                 for (int f = 0; f < numComp; f++) {
373                                         //sort random scores
374                                         sort(rScores[f].begin(), rScores[f].end());
375                                         
376                                         //the index of the score higher than yours is returned 
377                                         //so if you have 1000 random trees the index returned is 100 
378                                         //then there are 900 trees with a score greater then you. 
379                                         //giving you a signifigance of 0.900
380                                         int index = findIndex(userData[f], f);    if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
381                                         
382                                         //the signifigance is the number of trees with the users score or higher 
383                                         WScoreSig.push_back((iters-index)/(float)iters);
384                                 }
385                                 
386                                 //out << "Tree# " << i << endl;
387                                 calculateFreqsCumuls();
388                                 printWeightedFile();
389                                 
390                                 delete output;
391                         
392                         }
393                         
394                         //clear data
395                         rScores.clear();
396                         uScores.clear();
397                         validScores.clear();
398                 }
399                 
400                 
401                 if (m->control_pressed) { delete tmap; delete weighted;
402                         for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      remove(outputNames[i].c_str());  } return 0;  }
403                 
404                 printWSummaryFile();
405                 
406                 if (phylip) {   createPhylipFile();             }
407
408                 //clear out users groups
409                 m->Groups.clear();
410                 delete tmap; delete weighted;
411                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
412                 
413                 
414                 if (m->control_pressed) { 
415                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  }
416                         return 0; 
417                 }
418                 
419                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
420                 
421                 //set phylip file as new current phylipfile
422                 string current = "";
423                 itTypes = outputTypes.find("phylip");
424                 if (itTypes != outputTypes.end()) {
425                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
426                 }
427                 
428                 //set column file as new current columnfile
429                 itTypes = outputTypes.find("column");
430                 if (itTypes != outputTypes.end()) {
431                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
432                 }
433                 
434                 m->mothurOutEndLine();
435                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
436                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
437                 m->mothurOutEndLine();
438                 
439                 return 0;
440                 
441         }
442         catch(exception& e) {
443                 m->errorOut(e, "UnifracWeightedCommand", "execute");
444                 exit(1);
445         }
446 }
447 /**************************************************************************************************/
448
449 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
450         try {
451 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
452                 int process = 1;
453                 vector<int> processIDS;
454                 
455                 EstOutput results;
456                 
457                 //loop through and create all the processes you want
458                 while (process != processors) {
459                         int pid = fork();
460                         
461                         if (pid > 0) {
462                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
463                                 process++;
464                         }else if (pid == 0){
465                                 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
466                         
467                                 //pass numSeqs to parent
468                                 ofstream out;
469                                 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
470                                 m->openOutputFile(tempFile, out);
471                                 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t';  } out << endl;
472                                 out.close();
473                                 
474                                 exit(0);
475                         }else { 
476                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
477                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
478                                 exit(0);
479                         }
480                 }
481                 
482                 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
483                 
484                 //force parent to wait until all the processes are done
485                 for (int i=0;i<(processors-1);i++) { 
486                         int temp = processIDS[i];
487                         wait(&temp);
488                 }
489                 
490                 //get data created by processes
491                 for (int i=0;i<(processors-1);i++) { 
492         
493                         ifstream in;
494                         string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
495                         m->openInputFile(s, in);
496                         
497                         double tempScore;
498                         for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
499                         in.close();
500                         remove(s.c_str());
501                 }
502                 
503                 return 0;
504 #endif          
505         }
506         catch(exception& e) {
507                 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
508                 exit(1);
509         }
510 }
511
512 /**************************************************************************************************/
513 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) { 
514  try {
515                 Tree* randT = new Tree(tmap);
516
517                 for (int h = start; h < (start+num); h++) {
518         
519                         if (m->control_pressed) { return 0; }
520                 
521                         //initialize weighted score
522                         string groupA = namesOfGroupCombos[h][0]; 
523                         string groupB = namesOfGroupCombos[h][1];
524                         
525                         //copy T[i]'s info.
526                         randT->getCopy(t);
527                          
528                         //create a random tree with same topology as T[i], but different labels
529                         randT->assembleRandomUnifracTree(groupA, groupB);
530                         
531                         if (m->control_pressed) { delete randT;  return 0;  }
532
533                         //get wscore of random tree
534                         EstOutput randomData = weighted->getValues(randT, groupA, groupB);
535                 
536                         if (m->control_pressed) { delete randT;  return 0;  }
537                                                                                 
538                         //save scores
539                         scores[h].push_back(randomData[0]);
540                 }
541         
542                 delete randT;
543         
544                 return 0;
545
546         }
547         catch(exception& e) {
548                 m->errorOut(e, "UnifracWeightedCommand", "driver");
549                 exit(1);
550         }
551 }
552 /***********************************************************/
553 void UnifracWeightedCommand::printWeightedFile() {
554         try {
555                 vector<double> data;
556                 vector<string> tags;
557                 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
558                 
559                 for(int a = 0; a < numComp; a++) {
560                         output->initFile(groupComb[a], tags);
561                         //print each line
562                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
563                                 data.push_back(it->first);  data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
564                                 output->output(data);
565                                 data.clear();
566                         } 
567                         output->resetFile();
568                 }
569         }
570         catch(exception& e) {
571                 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
572                 exit(1);
573         }
574 }
575
576
577 /***********************************************************/
578 void UnifracWeightedCommand::printWSummaryFile() {
579         try {
580                 //column headers
581                 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t' << "WSig" <<  endl;
582                 m->mothurOut("Tree#\tGroups\tWScore\tWSig"); m->mothurOutEndLine(); 
583                 
584                 //format output
585                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
586                 
587                 //print each line
588                 int count = 0;
589                 for (int i = 0; i < T.size(); i++) { 
590                         for (int j = 0; j < numComp; j++) {
591                                 if (random) {
592                                         if (WScoreSig[count] > (1/(float)iters)) {
593                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
594                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
595                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" +  toString(WScoreSig[count]) + "\n");   
596                                         }else{
597                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
598                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
599                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" +  toString((1/float(iters))) + "\n");  
600                                         }
601                                 }else{
602                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << "0.00" << endl; 
603                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << "0.00" << endl; 
604                                         m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t0.00\n"); 
605                                 }
606                                 count++;
607                         }
608                 }
609                 outSum.close();
610         }
611         catch(exception& e) {
612                 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
613                 exit(1);
614         }
615 }
616 /***********************************************************/
617 void UnifracWeightedCommand::createPhylipFile() {
618         try {
619                 int count = 0;
620                 //for each tree
621                 for (int i = 0; i < T.size(); i++) { 
622                 
623                         string phylipFileName;
624                         if ((outputForm == "lt") || (outputForm == "square")) {
625                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.phylip.dist";
626                                 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
627                         }else { //column
628                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.column.dist";
629                                 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
630                         }
631                         
632                         ofstream out;
633                         m->openOutputFile(phylipFileName, out);
634                         
635                         if ((outputForm == "lt") || (outputForm == "square")) {
636                                 //output numSeqs
637                                 out << m->Groups.size() << endl;
638                         }
639
640                         //make matrix with scores in it
641                         vector< vector<float> > dists;  dists.resize(m->Groups.size());
642                         for (int i = 0; i < m->Groups.size(); i++) {
643                                 dists[i].resize(m->Groups.size(), 0.0);
644                         }
645                         
646                         //flip it so you can print it
647                         for (int r=0; r<m->Groups.size(); r++) { 
648                                 for (int l = 0; l < r; l++) {
649                                         dists[r][l] = utreeScores[count];
650                                         dists[l][r] = utreeScores[count];
651                                         count++;
652                                 }
653                         }
654
655                         //output to file
656                         for (int r=0; r<m->Groups.size(); r++) { 
657                                 //output name
658                                 string name = m->Groups[r];
659                                 if (name.length() < 10) { //pad with spaces to make compatible
660                                         while (name.length() < 10) {  name += " ";  }
661                                 }
662                                 
663                                 if (outputForm == "lt") {
664                                         out << name << '\t';
665                                         
666                                         //output distances
667                                         for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
668                                         out << endl;
669                                 }else if (outputForm == "square") {
670                                         out << name << '\t';
671                                         
672                                         //output distances
673                                         for (int l = 0; l < m->Groups.size(); l++) {    out  << dists[r][l] << '\t';  }
674                                         out << endl;
675                                 }else{
676                                         //output distances
677                                         for (int l = 0; l < r; l++) {   
678                                                 string otherName = m->Groups[l];
679                                                 if (otherName.length() < 10) { //pad with spaces to make compatible
680                                                         while (otherName.length() < 10) {  otherName += " ";  }
681                                                 }
682                                                 
683                                                 out  << name << '\t' << otherName << dists[r][l] << endl;  
684                                         }
685                                 }
686                         }
687                         out.close();
688                 }
689         }
690         catch(exception& e) {
691                 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
692                 exit(1);
693         }
694 }
695 /***********************************************************/
696 int UnifracWeightedCommand::findIndex(float score, int index) {
697         try{
698                 for (int i = 0; i < rScores[index].size(); i++) {
699                         if (rScores[index][i] >= score) {       return i;       }
700                 }
701                 return rScores[index].size();
702         }
703         catch(exception& e) {
704                 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
705                 exit(1);
706         }
707 }
708
709 /***********************************************************/
710
711 void UnifracWeightedCommand::calculateFreqsCumuls() {
712         try {
713                 //clear out old tree values
714                 rScoreFreq.clear();
715                 rScoreFreq.resize(numComp);
716                 rCumul.clear();
717                 rCumul.resize(numComp);
718                 validScores.clear();
719         
720                 //calculate frequency
721                 for (int f = 0; f < numComp; f++) {
722                         for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7...  you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
723                                 validScores[rScores[f][i]] = rScores[f][i];
724                                 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
725                                 if (it != rScoreFreq[f].end()) {
726                                         rScoreFreq[f][rScores[f][i]]++;
727                                 }else{
728                                         rScoreFreq[f][rScores[f][i]] = 1;
729                                 }
730                         }
731                 }
732                 
733                 //calculate rcumul
734                 for(int a = 0; a < numComp; a++) {
735                         float rcumul = 1.0000;
736                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
737                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
738                                 //make rscoreFreq map and rCumul
739                                 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
740                                 rCumul[a][it->first] = rcumul;
741                                 //get percentage of random trees with that info
742                                 if (it2 != rScoreFreq[a].end()) {  rScoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
743                                 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
744                         }
745                 }
746
747         }
748         catch(exception& e) {
749                 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
750                 exit(1);
751         }
752 }
753 /*****************************************************************/
754 int UnifracWeightedCommand::readNamesFile() {
755         try {
756                 m->names.clear();
757                 numUniquesInName = 0;
758                 
759                 ifstream in;
760                 m->openInputFile(namefile, in);
761                 
762                 string first, second;
763                 map<string, string>::iterator itNames;
764                 
765                 while(!in.eof()) {
766                         in >> first >> second; m->gobble(in);
767                         
768                         numUniquesInName++;
769                         
770                         itNames = m->names.find(first);
771                         if (itNames == m->names.end()) {  
772                                 m->names[first] = second; 
773                                 
774                                 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
775                                 vector<string> dupNames;
776                                 m->splitAtComma(second, dupNames);
777                                 
778                                 for (int i = 0; i < dupNames.size(); i++) {     
779                                         nameMap[dupNames[i]] = dupNames[i]; 
780                                         if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); } 
781                                 }
782                         }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }                  
783                 }
784                 in.close();
785                 
786                 return 0;
787         }
788         catch(exception& e) {
789                 m->errorOut(e, "UnifracWeightedCommand", "readNamesFile");
790                 exit(1);
791         }
792 }
793 /***********************************************************/
794
795
796
797
798