]> git.donarmstrong.com Git - mothur.git/blob - unifracweightedcommand.cpp
removed UWSig and WSig from output if user does not set random=T
[mothur.git] / unifracweightedcommand.cpp
1 /*
2  *  unifracweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracweightedcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> UnifracWeightedCommand::setParameters(){ 
14         try {
15                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
16                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
17                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
18                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
19                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
20                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
21                 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
22                 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
23                 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26                 
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string UnifracWeightedCommand::getHelpString(){ 
38         try {
39                 string helpString = "";
40                 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root and random.  tree parameter is required unless you have valid current tree file.\n";
41                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
42                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
43                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
44                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
45                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
46                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
47                 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
48                 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
49                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
50                 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
51                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
52                 return helpString;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60 UnifracWeightedCommand::UnifracWeightedCommand(){       
61         try {
62                 abort = true; calledHelp = true; 
63                 setParameters();
64                 vector<string> tempOutNames;
65                 outputTypes["weighted"] = tempOutNames;
66                 outputTypes["wsummary"] = tempOutNames;
67                 outputTypes["phylip"] = tempOutNames;
68                 outputTypes["column"] = tempOutNames;
69         }
70         catch(exception& e) {
71                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
72                 exit(1);
73         }
74 }
75
76 /***********************************************************/
77 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
78         try {
79                 abort = false; calledHelp = false;   
80                         
81                 //allow user to run help
82                 if(option == "help") { help(); abort = true; calledHelp = true; }
83                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
84                 
85                 else {
86                         vector<string> myArray = setParameters();
87                         
88                         OptionParser parser(option);
89                         map<string,string> parameters=parser.getParameters();
90                         map<string,string>::iterator it;
91                         
92                         ValidParameters validParameter;
93                 
94                         //check to make sure all parameters are valid for command
95                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
96                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
97                         }
98                         
99                         //initialize outputTypes
100                         vector<string> tempOutNames;
101                         outputTypes["weighted"] = tempOutNames;
102                         outputTypes["wsummary"] = tempOutNames;
103                         outputTypes["phylip"] = tempOutNames;
104                         outputTypes["column"] = tempOutNames;
105                         
106                         //if the user changes the input directory command factory will send this info to us in the output parameter 
107                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
108                         if (inputDir == "not found"){   inputDir = "";          }
109                         else {
110                                 string path;
111                                 it = parameters.find("tree");
112                                 //user has given a template file
113                                 if(it != parameters.end()){ 
114                                         path = m->hasPath(it->second);
115                                         //if the user has not given a path then, add inputdir. else leave path alone.
116                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
117                                 }
118                                 
119                                 it = parameters.find("group");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
125                                 }
126                                 
127                                 it = parameters.find("name");
128                                 //user has given a template file
129                                 if(it != parameters.end()){ 
130                                         path = m->hasPath(it->second);
131                                         //if the user has not given a path then, add inputdir. else leave path alone.
132                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
133                                 }
134                         }
135                         
136                         m->runParse = true;
137                         m->Groups.clear();
138                         m->namesOfGroups.clear();
139                         m->Treenames.clear();
140                         m->names.clear();
141                         
142                         //check for required parameters
143                         treefile = validParameter.validFile(parameters, "tree", true);
144                         if (treefile == "not open") { abort = true; }
145                         else if (treefile == "not found") {                             //if there is a current design file, use it
146                                 treefile = m->getTreeFile(); 
147                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
148                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
149                         }else { m->setTreeFile(treefile); }     
150                         
151                         //check for required parameters
152                         groupfile = validParameter.validFile(parameters, "group", true);
153                         if (groupfile == "not open") { abort = true; }
154                         else if (groupfile == "not found") { groupfile = ""; }
155                         else { m->setGroupFile(groupfile); }
156                         
157                         namefile = validParameter.validFile(parameters, "name", true);
158                         if (namefile == "not open") { abort = true; }
159                         else if (namefile == "not found") { namefile = ""; }
160                         else { m->setNameFile(namefile); }
161                         
162                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
163                         
164                                                                                                                                         
165                         //check for optional parameter and set defaults
166                         // ...at some point should added some additional type checking...
167                         groups = validParameter.validFile(parameters, "groups", false);                 
168                         if (groups == "not found") { groups = ""; }
169                         else { 
170                                 m->splitAtDash(groups, Groups);
171                                 m->Groups = Groups;
172                         }
173                                 
174                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
175                         convert(itersString, iters); 
176                         
177                         string temp = validParameter.validFile(parameters, "distance", false);                  
178                         if (temp == "not found") { phylip = false; outputForm = ""; }
179                         else{
180                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
181                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
182                         }
183                         
184                         temp = validParameter.validFile(parameters, "random", false);                           if (temp == "not found") { temp = "F"; }
185                         random = m->isTrue(temp);
186                         
187                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
188                         includeRoot = m->isTrue(temp);
189                         
190                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
191                         m->setProcessors(temp);
192                         convert(temp, processors);
193                         
194                         if (!random) {  iters = 0;  } //turn off random calcs
195                 }
196                 
197                 
198         }
199         catch(exception& e) {
200                 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
201                 exit(1);
202         }
203 }
204 /***********************************************************/
205 int UnifracWeightedCommand::execute() {
206         try {
207         
208                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
209                 
210                 m->setTreeFile(treefile);
211                 
212                 if (groupfile != "") {
213                         //read in group map info.
214                         tmap = new TreeMap(groupfile);
215                         tmap->readMap();
216                 }else{ //fake out by putting everyone in one group
217                         Tree* tree = new Tree(treefile); delete tree;  //extracts names from tree to make faked out groupmap
218                         tmap = new TreeMap();
219                         
220                         for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
221                 }
222                 
223                 if (namefile != "") { readNamesFile(); }
224                 
225                 read = new ReadNewickTree(treefile);
226                 int readOk = read->read(tmap); 
227                 
228                 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
229                 
230                 read->AssembleTrees();
231                 T = read->getTrees();
232                 delete read;
233                 
234                 //make sure all files match
235                 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
236                 int numNamesInTree;
237                 if (namefile != "")  {  
238                         if (numUniquesInName == m->Treenames.size()) {  numNamesInTree = nameMap.size();  }
239                         else {   numNamesInTree = m->Treenames.size();  }
240                 }else {  numNamesInTree = m->Treenames.size();  }
241                 
242                 
243                 //output any names that are in group file but not in tree
244                 if (numNamesInTree < tmap->getNumSeqs()) {
245                         for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
246                                 //is that name in the tree?
247                                 int count = 0;
248                                 for (int j = 0; j < m->Treenames.size(); j++) {
249                                         if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
250                                         count++;
251                                 }
252                                 
253                                 if (m->control_pressed) { 
254                                         delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
255                                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
256                                         m->Groups.clear();
257                                         return 0;
258                                 }
259                                 
260                                 //then you did not find it so report it 
261                                 if (count == m->Treenames.size()) { 
262                                         //if it is in your namefile then don't remove
263                                         map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
264                                         
265                                         if (it == nameMap.end()) {
266                                                 m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
267                                                 tmap->removeSeq(tmap->namesOfSeqs[i]);
268                                                 i--; //need this because removeSeq removes name from namesOfSeqs
269                                         }
270                                 }
271                         }
272                 }
273                 
274                 sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
275                 m->openOutputFile(sumFile, outSum);
276                 outputNames.push_back(sumFile);  outputTypes["wsummary"].push_back(sumFile);
277                         
278                 util = new SharedUtil();
279                 string s; //to make work with setgroups
280                 util->setGroups(m->Groups, tmap->namesOfGroups, s, numGroups, "weighted");      //sets the groups the user wants to analyze
281                 util->getCombos(groupComb, m->Groups, numComp);
282                 delete util;
283                 
284                 weighted = new Weighted(tmap, includeRoot);
285                         
286                 int start = time(NULL);
287                 
288                 //get weighted for users tree
289                 userData.resize(numComp,0);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
290                 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
291                 
292                 if (numComp < processors) { processors = numComp; }
293                                 
294                 //get weighted scores for users trees
295                 for (int i = 0; i < T.size(); i++) {
296                         
297                         if (m->control_pressed) { delete tmap; delete weighted;
298                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } return 0; }
299
300                         counter = 0;
301                         rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
302                         uScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
303                         
304                         if (random) {  
305                                 output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted", itersString);  
306                                 outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
307                                 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted");
308                         } 
309
310                         userData = weighted->getValues(T[i], processors, outputDir);  //userData[0] = weightedscore
311                         
312                         if (m->control_pressed) { delete tmap; delete weighted;
313                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } return 0; }
314                         
315                         //save users score
316                         for (int s=0; s<numComp; s++) {
317                                 //add users score to vector of user scores
318                                 uScores[s].push_back(userData[s]);
319                                 
320                                 //save users tree score for summary file
321                                 utreeScores.push_back(userData[s]);
322                         }
323                         
324                         if (random) { 
325                         
326                                 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
327                                 vector< vector<string> > namesOfGroupCombos;
328                                 for (int a=0; a<numGroups; a++) { 
329                                         for (int l = 0; l < a; l++) {   
330                                                 vector<string> groups; groups.push_back(m->Groups[a]); groups.push_back(m->Groups[l]);
331                                                 namesOfGroupCombos.push_back(groups);
332                                         }
333                                 }
334                                 
335                                 lines.clear();
336                                 
337                                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
338                                         if(processors != 1){
339                                                 int numPairs = namesOfGroupCombos.size();
340                                                 int numPairsPerProcessor = numPairs / processors;
341                                         
342                                                 for (int i = 0; i < processors; i++) {
343                                                         int startPos = i * numPairsPerProcessor;
344                                                         if(i == processors - 1){
345                                                                 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
346                                                         }
347                                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
348                                                 }
349                                         }
350                                 #endif
351
352                                 
353                                 //get scores for random trees
354                                 for (int j = 0; j < iters; j++) {
355                                 
356                                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
357                                                 if(processors == 1){
358                                                         driver(T[i],  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
359                                                 }else{
360                                                         createProcesses(T[i],  namesOfGroupCombos, rScores);
361                                                 }
362                                         #else
363                                                 driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
364                                         #endif
365                                         
366                                         if (m->control_pressed) { delete tmap; delete weighted;
367                                                 for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } return 0; }
368                                         
369                                         //report progress
370 //                                      m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();          
371                                 }
372                                 lines.clear();
373                         
374                                 //find the signifigance of the score for summary file
375                                 for (int f = 0; f < numComp; f++) {
376                                         //sort random scores
377                                         sort(rScores[f].begin(), rScores[f].end());
378                                         
379                                         //the index of the score higher than yours is returned 
380                                         //so if you have 1000 random trees the index returned is 100 
381                                         //then there are 900 trees with a score greater then you. 
382                                         //giving you a signifigance of 0.900
383                                         int index = findIndex(userData[f], f);    if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
384                                         
385                                         //the signifigance is the number of trees with the users score or higher 
386                                         WScoreSig.push_back((iters-index)/(float)iters);
387                                 }
388                                 
389                                 //out << "Tree# " << i << endl;
390                                 calculateFreqsCumuls();
391                                 printWeightedFile();
392                                 
393                                 delete output;
394                         
395                         }
396                         
397                         //clear data
398                         rScores.clear();
399                         uScores.clear();
400                         validScores.clear();
401                 }
402                 
403                 
404                 if (m->control_pressed) { delete tmap; delete weighted;
405                         for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } return 0;  }
406                 
407                 printWSummaryFile();
408                 
409                 if (phylip) {   createPhylipFile();             }
410
411                 //clear out users groups
412                 m->Groups.clear();
413                 delete tmap; delete weighted;
414                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
415                 
416                 
417                 if (m->control_pressed) { 
418                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  }
419                         return 0; 
420                 }
421                 
422                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
423                 
424                 //set phylip file as new current phylipfile
425                 string current = "";
426                 itTypes = outputTypes.find("phylip");
427                 if (itTypes != outputTypes.end()) {
428                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
429                 }
430                 
431                 //set column file as new current columnfile
432                 itTypes = outputTypes.find("column");
433                 if (itTypes != outputTypes.end()) {
434                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
435                 }
436                 
437                 m->mothurOutEndLine();
438                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
439                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
440                 m->mothurOutEndLine();
441                 
442                 return 0;
443                 
444         }
445         catch(exception& e) {
446                 m->errorOut(e, "UnifracWeightedCommand", "execute");
447                 exit(1);
448         }
449 }
450 /**************************************************************************************************/
451
452 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
453         try {
454 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
455                 int process = 1;
456                 vector<int> processIDS;
457                 
458                 EstOutput results;
459                 
460                 //loop through and create all the processes you want
461                 while (process != processors) {
462                         int pid = fork();
463                         
464                         if (pid > 0) {
465                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
466                                 process++;
467                         }else if (pid == 0){
468                                 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
469                         
470                                 //pass numSeqs to parent
471                                 ofstream out;
472                                 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
473                                 m->openOutputFile(tempFile, out);
474                                 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t';  } out << endl;
475                                 out.close();
476                                 
477                                 exit(0);
478                         }else { 
479                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
480                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
481                                 exit(0);
482                         }
483                 }
484                 
485                 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
486                 
487                 //force parent to wait until all the processes are done
488                 for (int i=0;i<(processors-1);i++) { 
489                         int temp = processIDS[i];
490                         wait(&temp);
491                 }
492                 
493                 //get data created by processes
494                 for (int i=0;i<(processors-1);i++) { 
495         
496                         ifstream in;
497                         string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
498                         m->openInputFile(s, in);
499                         
500                         double tempScore;
501                         for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
502                         in.close();
503                         m->mothurRemove(s);
504                 }
505                 
506                 return 0;
507 #endif          
508         }
509         catch(exception& e) {
510                 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
511                 exit(1);
512         }
513 }
514
515 /**************************************************************************************************/
516 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) { 
517  try {
518                 Tree* randT = new Tree(tmap);
519
520                 for (int h = start; h < (start+num); h++) {
521         
522                         if (m->control_pressed) { return 0; }
523                 
524                         //initialize weighted score
525                         string groupA = namesOfGroupCombos[h][0]; 
526                         string groupB = namesOfGroupCombos[h][1];
527                         
528                         //copy T[i]'s info.
529                         randT->getCopy(t);
530                          
531                         //create a random tree with same topology as T[i], but different labels
532                         randT->assembleRandomUnifracTree(groupA, groupB);
533                         
534                         if (m->control_pressed) { delete randT;  return 0;  }
535
536                         //get wscore of random tree
537                         EstOutput randomData = weighted->getValues(randT, groupA, groupB);
538                 
539                         if (m->control_pressed) { delete randT;  return 0;  }
540                                                                                 
541                         //save scores
542                         scores[h].push_back(randomData[0]);
543                 }
544         
545                 delete randT;
546         
547                 return 0;
548
549         }
550         catch(exception& e) {
551                 m->errorOut(e, "UnifracWeightedCommand", "driver");
552                 exit(1);
553         }
554 }
555 /***********************************************************/
556 void UnifracWeightedCommand::printWeightedFile() {
557         try {
558                 vector<double> data;
559                 vector<string> tags;
560                 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
561                 
562                 for(int a = 0; a < numComp; a++) {
563                         output->initFile(groupComb[a], tags);
564                         //print each line
565                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
566                                 data.push_back(it->first);  data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
567                                 output->output(data);
568                                 data.clear();
569                         } 
570                         output->resetFile();
571                 }
572         }
573         catch(exception& e) {
574                 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
575                 exit(1);
576         }
577 }
578
579
580 /***********************************************************/
581 void UnifracWeightedCommand::printWSummaryFile() {
582         try {
583                 //column headers
584                 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
585                 m->mothurOut("Tree#\tGroups\tWScore\t");
586                 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
587                 outSum << endl; m->mothurOutEndLine();
588                 
589                 //format output
590                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
591                 
592                 //print each line
593                 int count = 0;
594                 for (int i = 0; i < T.size(); i++) { 
595                         for (int j = 0; j < numComp; j++) {
596                                 if (random) {
597                                         if (WScoreSig[count] > (1/(float)iters)) {
598                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
599                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl; 
600                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" +  toString(WScoreSig[count]) + "\n");   
601                                         }else{
602                                                 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
603                                                 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
604                                                 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" +  toString((1/float(iters))) + "\n");  
605                                         }
606                                 }else{
607                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl; 
608                                         cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count]  << endl; 
609                                         m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n"); 
610                                 }
611                                 count++;
612                         }
613                 }
614                 outSum.close();
615         }
616         catch(exception& e) {
617                 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
618                 exit(1);
619         }
620 }
621 /***********************************************************/
622 void UnifracWeightedCommand::createPhylipFile() {
623         try {
624                 int count = 0;
625                 //for each tree
626                 for (int i = 0; i < T.size(); i++) { 
627                 
628                         string phylipFileName;
629                         if ((outputForm == "lt") || (outputForm == "square")) {
630                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.phylip.dist";
631                                 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
632                         }else { //column
633                                 phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".weighted.column.dist";
634                                 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
635                         }
636                         
637                         ofstream out;
638                         m->openOutputFile(phylipFileName, out);
639                         
640                         if ((outputForm == "lt") || (outputForm == "square")) {
641                                 //output numSeqs
642                                 out << m->Groups.size() << endl;
643                         }
644
645                         //make matrix with scores in it
646                         vector< vector<float> > dists;  dists.resize(m->Groups.size());
647                         for (int i = 0; i < m->Groups.size(); i++) {
648                                 dists[i].resize(m->Groups.size(), 0.0);
649                         }
650                         
651                         //flip it so you can print it
652                         for (int r=0; r<m->Groups.size(); r++) { 
653                                 for (int l = 0; l < r; l++) {
654                                         dists[r][l] = utreeScores[count];
655                                         dists[l][r] = utreeScores[count];
656                                         count++;
657                                 }
658                         }
659
660                         //output to file
661                         for (int r=0; r<m->Groups.size(); r++) { 
662                                 //output name
663                                 string name = m->Groups[r];
664                                 if (name.length() < 10) { //pad with spaces to make compatible
665                                         while (name.length() < 10) {  name += " ";  }
666                                 }
667                                 
668                                 if (outputForm == "lt") {
669                                         out << name << '\t';
670                                         
671                                         //output distances
672                                         for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
673                                         out << endl;
674                                 }else if (outputForm == "square") {
675                                         out << name << '\t';
676                                         
677                                         //output distances
678                                         for (int l = 0; l < m->Groups.size(); l++) {    out  << dists[r][l] << '\t';  }
679                                         out << endl;
680                                 }else{
681                                         //output distances
682                                         for (int l = 0; l < r; l++) {   
683                                                 string otherName = m->Groups[l];
684                                                 if (otherName.length() < 10) { //pad with spaces to make compatible
685                                                         while (otherName.length() < 10) {  otherName += " ";  }
686                                                 }
687                                                 
688                                                 out  << name << '\t' << otherName << dists[r][l] << endl;  
689                                         }
690                                 }
691                         }
692                         out.close();
693                 }
694         }
695         catch(exception& e) {
696                 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
697                 exit(1);
698         }
699 }
700 /***********************************************************/
701 int UnifracWeightedCommand::findIndex(float score, int index) {
702         try{
703                 for (int i = 0; i < rScores[index].size(); i++) {
704                         if (rScores[index][i] >= score) {       return i;       }
705                 }
706                 return rScores[index].size();
707         }
708         catch(exception& e) {
709                 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
710                 exit(1);
711         }
712 }
713
714 /***********************************************************/
715
716 void UnifracWeightedCommand::calculateFreqsCumuls() {
717         try {
718                 //clear out old tree values
719                 rScoreFreq.clear();
720                 rScoreFreq.resize(numComp);
721                 rCumul.clear();
722                 rCumul.resize(numComp);
723                 validScores.clear();
724         
725                 //calculate frequency
726                 for (int f = 0; f < numComp; f++) {
727                         for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7...  you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
728                                 validScores[rScores[f][i]] = rScores[f][i];
729                                 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
730                                 if (it != rScoreFreq[f].end()) {
731                                         rScoreFreq[f][rScores[f][i]]++;
732                                 }else{
733                                         rScoreFreq[f][rScores[f][i]] = 1;
734                                 }
735                         }
736                 }
737                 
738                 //calculate rcumul
739                 for(int a = 0; a < numComp; a++) {
740                         float rcumul = 1.0000;
741                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
742                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
743                                 //make rscoreFreq map and rCumul
744                                 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
745                                 rCumul[a][it->first] = rcumul;
746                                 //get percentage of random trees with that info
747                                 if (it2 != rScoreFreq[a].end()) {  rScoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
748                                 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
749                         }
750                 }
751
752         }
753         catch(exception& e) {
754                 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
755                 exit(1);
756         }
757 }
758 /*****************************************************************/
759 int UnifracWeightedCommand::readNamesFile() {
760         try {
761                 m->names.clear();
762                 numUniquesInName = 0;
763                 
764                 ifstream in;
765                 m->openInputFile(namefile, in);
766                 
767                 string first, second;
768                 map<string, string>::iterator itNames;
769                 
770                 while(!in.eof()) {
771                         in >> first >> second; m->gobble(in);
772                         
773                         numUniquesInName++;
774                         
775                         itNames = m->names.find(first);
776                         if (itNames == m->names.end()) {  
777                                 m->names[first] = second; 
778                                 
779                                 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
780                                 vector<string> dupNames;
781                                 m->splitAtComma(second, dupNames);
782                                 
783                                 for (int i = 0; i < dupNames.size(); i++) {     
784                                         nameMap[dupNames[i]] = dupNames[i]; 
785                                         if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); } 
786                                 }
787                         }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }                  
788                 }
789                 in.close();
790                 
791                 return 0;
792         }
793         catch(exception& e) {
794                 m->errorOut(e, "UnifracWeightedCommand", "readNamesFile");
795                 exit(1);
796         }
797 }
798 /***********************************************************/
799
800
801
802
803