]> git.donarmstrong.com Git - mothur.git/blob - unifracunweightedcommand.cpp
db5cf90cf56ad18ad1f5fd80cb6ee5b79c1aaa32
[mothur.git] / unifracunweightedcommand.cpp
1 /*
2  *  unifracunweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracunweightedcommand.h"
11 #include "treereader.h"
12 #include "subsample.h"
13 #include "consensus.h"
14
15 //**********************************************************************************************************************
16 vector<string> UnifracUnweightedCommand::setParameters(){       
17         try {
18                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24                 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
25                 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
26         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
27         CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
28         CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
29                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
31                 
32                 vector<string> myArray;
33                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
34                 return myArray;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "UnifracUnweightedCommand", "setParameters");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 string UnifracUnweightedCommand::getHelpString(){       
43         try {
44                 string helpString = "";
45                 helpString += "The unifrac.unweighted command parameters are tree, group, name, groups, iters, distance, processors, root and random.  tree parameter is required unless you have valid current tree file.\n";
46                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n";
47                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
48                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n";
49                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n";
50                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
51                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52                 helpString += "The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n";
53         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
54         helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results of the subsampling, as well as a consensus tree built from these trees. Default=F.\n";
55                 helpString += "Example unifrac.unweighted(groups=A-B-C, iters=500).\n";
56                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
57                 helpString += "The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual.\n";
58                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
59                 return helpString;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "UnifracUnweightedCommand", "getHelpString");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 UnifracUnweightedCommand::UnifracUnweightedCommand(){   
68         try {
69                 abort = true; calledHelp = true; 
70                 setParameters();
71                 vector<string> tempOutNames;
72                 outputTypes["unweighted"] = tempOutNames;
73                 outputTypes["uwsummary"] = tempOutNames;
74                 outputTypes["phylip"] = tempOutNames;
75                 outputTypes["column"] = tempOutNames;
76         outputTypes["tree"] = tempOutNames;
77         }
78         catch(exception& e) {
79                 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
80                 exit(1);
81         }
82 }
83 /***********************************************************/
84 UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
85         try {
86                 abort = false; calledHelp = false;   
87                 
88                         
89                 //allow user to run help
90                 if(option == "help") { help(); abort = true; calledHelp = true; }
91                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
92                 
93                 else {
94                         vector<string> myArray = setParameters();
95                         
96                         OptionParser parser(option);
97                         map<string,string> parameters = parser.getParameters();
98                         map<string,string>::iterator it;
99                         
100                         ValidParameters validParameter;
101                 
102                         //check to make sure all parameters are valid for command
103                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
104                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
105                         }
106                         
107                         //initialize outputTypes
108                         vector<string> tempOutNames;
109                         outputTypes["unweighted"] = tempOutNames;
110                         outputTypes["uwsummary"] = tempOutNames;
111                         outputTypes["phylip"] = tempOutNames;
112                         outputTypes["column"] = tempOutNames;
113             outputTypes["tree"] = tempOutNames;
114                         
115                         //if the user changes the input directory command factory will send this info to us in the output parameter 
116                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
117                         if (inputDir == "not found"){   inputDir = "";          }
118                         else {
119                                 string path;
120                                 it = parameters.find("tree");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
126                                 }
127                                 
128                                 it = parameters.find("group");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
134                                 }
135                                 
136                                 it = parameters.find("name");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
142                                 }
143                         }
144                         
145             //check for required parameters
146                         treefile = validParameter.validFile(parameters, "tree", true);
147                         if (treefile == "not open") { abort = true; }
148                         else if (treefile == "not found") {                             //if there is a current design file, use it
149                                 treefile = m->getTreeFile(); 
150                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
151                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
152                         }else { m->setTreeFile(treefile); }     
153                         
154                         //check for required parameters
155                         groupfile = validParameter.validFile(parameters, "group", true);
156                         if (groupfile == "not open") { abort = true; }
157                         else if (groupfile == "not found") { groupfile = ""; }
158                         else { m->setGroupFile(groupfile); }
159                         
160                         namefile = validParameter.validFile(parameters, "name", true);
161                         if (namefile == "not open") { namefile = ""; abort = true; }
162                         else if (namefile == "not found") { namefile = ""; }
163                         else { m->setNameFile(namefile); }
164                         
165                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
166                         
167                         //check for optional parameter and set defaults
168                         // ...at some point should added some additional type checking...
169                         groups = validParameter.validFile(parameters, "groups", false);                 
170                         if (groups == "not found") { groups = ""; }
171                         else { 
172                                 m->splitAtDash(groups, Groups);
173                                 m->setGroups(Groups);
174                         }
175                                 
176                         itersString = validParameter.validFile(parameters, "iters", false);                             if (itersString == "not found") { itersString = "1000"; }
177                         m->mothurConvert(itersString, iters); 
178                         
179                         string temp = validParameter.validFile(parameters, "distance", false);                  
180                         if (temp == "not found") { phylip = false; outputForm = ""; }
181                         else{
182                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
183                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
184                         }
185                         
186                         temp = validParameter.validFile(parameters, "random", false);                                   if (temp == "not found") { temp = "f"; }
187                         random = m->isTrue(temp);
188                         
189                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
190                         includeRoot = m->isTrue(temp);
191                         
192                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
193                         m->setProcessors(temp);
194                         m->mothurConvert(temp, processors); 
195                         
196             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
197                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
198             else {  
199                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
200                 else { subsample = false; }
201             }
202                         
203             if (!subsample) { subsampleIters = 0;   }
204             else { subsampleIters = iters;          }
205             
206             temp = validParameter.validFile(parameters, "consensus", false);                                    if (temp == "not found") { temp = "F"; }
207                         consensus = m->isTrue(temp);
208             
209                         if (subsample && random) {  m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true;  } 
210                         if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } 
211             if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
212             if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
213
214                         if (!random) {  iters = 0;  } //turn off random calcs
215                         
216                         //if user selects distance = true and no groups it won't calc the pairwise
217                         if ((phylip) && (Groups.size() == 0)) {
218                                 groups = "all";
219                                 m->splitAtDash(groups, Groups);
220                                 m->setGroups(Groups);
221                         }
222                         
223                         if (namefile == "") {
224                                 vector<string> files; files.push_back(treefile);
225                                 parser.getNameFile(files);
226                         }
227                 }
228                 
229         }
230         catch(exception& e) {
231                 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
232                 exit(1);
233         }
234 }
235
236 /***********************************************************/
237 int UnifracUnweightedCommand::execute() {
238         try {
239                 
240                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
241                 
242                 m->setTreeFile(treefile);
243                 
244                 TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
245         T = reader->getTrees();
246         tmap = T[0]->getTreeMap();
247         map<string, string> nameMap = reader->getNames();
248         map<string, string> unique2Dup = reader->getNameMap();
249         delete reader;  
250         
251                 sumFile = outputDir + m->getSimpleName(treefile) + ".uwsummary";
252                 outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
253                 m->openOutputFile(sumFile, outSum);
254                 
255                 SharedUtil util;
256                 Groups = m->getGroups();
257                 vector<string> namesGroups = tmap->getNamesOfGroups();
258                 util.setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted");        //sets the groups the user wants to analyze
259                 
260                 Unweighted unweighted(includeRoot);
261                 
262                 int start = time(NULL);
263         
264         //set or check size
265         if (subsample) {
266             //user has not set size, set size = smallest samples size
267             if (subsampleSize == -1) { 
268                 vector<string> temp; temp.push_back(Groups[0]);
269                 subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
270                 for (int i = 1; i < Groups.size(); i++) {
271                     temp.clear(); temp.push_back(Groups[i]);
272                     int thisSize = (tmap->getNamesSeqs(temp)).size();
273                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
274                 }
275                 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
276             }else { //eliminate any too small groups
277                 vector<string> newGroups = Groups;
278                 Groups.clear();
279                 for (int i = 0; i < newGroups.size(); i++) {
280                     vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
281                     vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
282                     int thisSize = thisGroupsSeqs.size();
283                     
284                     if (thisSize >= subsampleSize) {    Groups.push_back(newGroups[i]); }
285                     else {   m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
286                 } 
287                 m->setGroups(Groups);
288             }
289         }
290                 
291         util.getCombos(groupComb, Groups, numComp);
292                 m->setGroups(Groups);
293         
294                 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
295         
296                 if (numComp < processors) { processors = numComp;  }
297         
298         if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
299                 
300                 outSum << "Tree#" << '\t' << "Groups" << '\t'  <<  "UWScore" <<'\t';
301                 m->mothurOut("Tree#\tGroups\tUWScore\t");
302                 if (random) { outSum << "UWSig"; m->mothurOut("UWSig"); }
303                 outSum << endl; m->mothurOutEndLine();
304          
305                 //get pscores for users trees
306                 for (int i = 0; i < T.size(); i++) {
307                         if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
308                         
309             counter = 0;
310                         
311                         if (random)  {  
312                                 output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted", itersString);
313                                 outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted");
314                                 outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted");
315                         }
316                         
317                         
318                         //get unweighted for users tree
319                         rscoreFreq.resize(numComp);  
320                         rCumul.resize(numComp);  
321                         utreeScores.resize(numComp);  
322                         UWScoreSig.resize(numComp); 
323             
324             vector<double> userData; userData.resize(numComp,0);  //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
325
326                         userData = unweighted.getValues(T[i], processors, outputDir);  //userData[0] = unweightedscore
327                 
328                         if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close();  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  }return 0; }
329                         
330                         //output scores for each combination
331                         for(int k = 0; k < numComp; k++) {
332                                 //saves users score
333                                 utreeScores[k].push_back(userData[k]);
334                                 
335                                 //add users score to validscores
336                                 validScores[userData[k]] = userData[k];
337                 
338                 if (!random) { UWScoreSig[k].push_back(0.0);    }
339                         }
340             
341             if (random) {  runRandomCalcs(T[i], userData);  }
342                         
343                         if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0;  }
344             
345             //subsample loop
346             vector< vector<double> > calcDistsTotals;  //each iter, each groupCombos dists. this will be used to make .dist files
347             for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
348                 
349                 if (m->control_pressed) { break; }
350                 
351                 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
352                 TreeMap* newTmap = new TreeMap();
353                 newTmap->getCopy(*tmap);
354                 
355                 SubSample sample;
356                 Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
357                 
358                 //uses method of setting groups to doNotIncludeMe
359                 //SubSample sample;
360                 //Tree* newTree2 = sample.getSample(T[i], newTmap, nameMap, subsampleSize, unique2Dup);
361                 // newTree2->print(cout);
362                 
363                 //call new weighted function
364                 vector<double> iterData; iterData.resize(numComp,0);
365                 Unweighted thisUnweighted(includeRoot);
366                 iterData = thisUnweighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
367                 
368                 //save data to make ave dist, std dist
369                 calcDistsTotals.push_back(iterData);
370                 
371                 delete newTmap;
372                 delete subSampleTree;
373                 
374                 if((thisIter+1) % 100 == 0){    m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine();              }
375             }
376             
377             if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]);  } return 0;  }
378
379             if (subsample) {  getAverageSTDMatrices(calcDistsTotals, i); }
380             if (consensus) {  getConsensusTrees(calcDistsTotals, i);  }
381             
382             //print output files
383                         printUWSummaryFile(i);
384                         if (random)  {  printUnweightedFile();  delete output;  }
385                         if (phylip) {   createPhylipFile(i);            }
386                         
387                         rscoreFreq.clear(); 
388                         rCumul.clear();  
389                         validScores.clear(); 
390                         utreeScores.clear();  
391                         UWScoreSig.clear(); 
392                 }
393                 
394
395                 outSum.close();
396                 delete tmap; 
397                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
398                 
399                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }     return 0; }
400                 
401                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
402                 
403                 //set phylip file as new current phylipfile
404                 string current = "";
405                 itTypes = outputTypes.find("phylip");
406                 if (itTypes != outputTypes.end()) {
407                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
408                 }
409                 
410                 //set column file as new current columnfile
411                 itTypes = outputTypes.find("column");
412                 if (itTypes != outputTypes.end()) {
413                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
414                 }
415                 
416                 m->mothurOutEndLine();
417                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
418                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
419                 m->mothurOutEndLine();
420                 
421                 return 0;
422                 
423         }
424         catch(exception& e) {
425                 m->errorOut(e, "UnifracUnweightedCommand", "execute");
426                 exit(1);
427         }
428 }
429 /**************************************************************************************************/
430 int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
431         try {
432         //we need to find the average distance and standard deviation for each groups distance
433         
434         //finds sum
435         vector<double> averages; averages.resize(numComp, 0); 
436         for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
437             for (int i = 0; i < dists[thisIter].size(); i++) {  
438                 averages[i] += dists[thisIter][i];
439             }
440         }
441         
442         //finds average.
443         for (int i = 0; i < averages.size(); i++) {  averages[i] /= (float) subsampleIters; }
444         
445         //find standard deviation
446         vector<double> stdDev; stdDev.resize(numComp, 0);
447         
448         for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
449             for (int j = 0; j < dists[thisIter].size(); j++) {
450                 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
451             }
452         }
453         for (int i = 0; i < stdDev.size(); i++) {  
454             stdDev[i] /= (float) subsampleIters; 
455             stdDev[i] = sqrt(stdDev[i]);
456         }
457         
458         //make matrix with scores in it
459         vector< vector<double> > avedists;      avedists.resize(m->getNumGroups());
460         for (int i = 0; i < m->getNumGroups(); i++) {
461             avedists[i].resize(m->getNumGroups(), 0.0);
462         }
463         
464         //make matrix with scores in it
465         vector< vector<double> > stddists;      stddists.resize(m->getNumGroups());
466         for (int i = 0; i < m->getNumGroups(); i++) {
467             stddists[i].resize(m->getNumGroups(), 0.0);
468         }
469         
470         //flip it so you can print it
471         int count = 0;
472         for (int r=0; r<m->getNumGroups(); r++) { 
473             for (int l = 0; l < r; l++) {
474                 avedists[r][l] = averages[count];
475                 avedists[l][r] = averages[count];
476                 stddists[r][l] = stdDev[count];
477                 stddists[l][r] = stdDev[count];
478                 count++;
479             }
480         }
481         
482         string aveFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".unweighted.ave.dist";
483         outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); 
484         
485         ofstream out;
486         m->openOutputFile(aveFileName, out);
487         
488         string stdFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".unweighted.std.dist";
489         outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); 
490         
491         ofstream outStd;
492         m->openOutputFile(stdFileName, outStd);
493         
494         if ((outputForm == "lt") || (outputForm == "square")) {
495             //output numSeqs
496             out << m->getNumGroups() << endl;
497             outStd << m->getNumGroups() << endl;
498         }
499         
500         //output to file
501         for (int r=0; r<m->getNumGroups(); r++) { 
502             //output name
503             string name = (m->getGroups())[r];
504             if (name.length() < 10) { //pad with spaces to make compatible
505                 while (name.length() < 10) {  name += " ";  }
506             }
507             
508             if (outputForm == "lt") {
509                 out << name << '\t';
510                 outStd << name << '\t';
511                 
512                 //output distances
513                 for (int l = 0; l < r; l++) {   out  << avedists[r][l] << '\t';  outStd  << stddists[r][l] << '\t';}
514                 out << endl;  outStd << endl;
515             }else if (outputForm == "square") {
516                 out << name << '\t';
517                 outStd << name << '\t';
518                 
519                 //output distances
520                 for (int l = 0; l < m->getNumGroups(); l++) {   out  << avedists[r][l] << '\t'; outStd  << stddists[r][l] << '\t'; }
521                 out << endl; outStd << endl;
522             }else{
523                 //output distances
524                 for (int l = 0; l < r; l++) {   
525                     string otherName = (m->getGroups())[l];
526                     if (otherName.length() < 10) { //pad with spaces to make compatible
527                         while (otherName.length() < 10) {  otherName += " ";  }
528                     }
529                     
530                     out  << name << '\t' << otherName << avedists[r][l] << endl;  
531                     outStd  << name << '\t' << otherName << stddists[r][l] << endl; 
532                 }
533             }
534         }
535         out.close();
536         outStd.close();
537         
538         return 0;
539     }
540         catch(exception& e) {
541                 m->errorOut(e, "UnifracUnweightedCommand", "getAverageSTDMatrices");
542                 exit(1);
543         }
544 }
545
546 /**************************************************************************************************/
547 int UnifracUnweightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
548         try {
549         
550         //used in tree constructor 
551         m->runParse = false;
552         
553         //create treemap class from groupmap for tree class to use
554         TreeMap newTmap;
555         newTmap.makeSim(m->getGroups());
556         
557         //clear  old tree names if any
558         m->Treenames.clear();
559         
560         //fills globaldatas tree names
561         m->Treenames = m->getGroups();
562         
563         vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
564         
565         if (m->control_pressed) { return 0; }
566         
567         Consensus con;
568         Tree* conTree = con.getTree(newTrees);
569         
570         //create a new filename
571         string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.cons.tre";                         
572         outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
573         ofstream outTree;
574         m->openOutputFile(conFile, outTree);
575         
576         if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
577         outTree.close();
578         
579         return 0;
580         
581     }
582         catch(exception& e) {
583                 m->errorOut(e, "UnifracUnweightedCommand", "getConsensusTrees");
584                 exit(1);
585         }
586 }
587 /**************************************************************************************************/
588
589 vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
590         try {
591         
592         vector<Tree*> trees;
593         
594         //create a new filename
595         string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.all.tre";                               
596         outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
597         
598         ofstream outAll;
599         m->openOutputFile(outputFile, outAll);
600         
601         
602         for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
603             
604             if (m->control_pressed) { break; }
605             
606             //make matrix with scores in it
607             vector< vector<double> > sims;      sims.resize(m->getNumGroups());
608             for (int j = 0; j < m->getNumGroups(); j++) {
609                 sims[j].resize(m->getNumGroups(), 0.0);
610             }
611             
612             int count = 0;
613                         for (int r=0; r<m->getNumGroups(); r++) { 
614                                 for (int l = 0; l < r; l++) {
615                     double sim = -(dists[i][count]-1.0);
616                                         sims[r][l] = sim;
617                                         sims[l][r] = sim;
618                                         count++;
619                                 }
620                         }
621             
622             //create tree
623             Tree* tempTree = new Tree(&mytmap, sims);
624             map<string, string> empty;
625             tempTree->assembleTree(empty);
626             
627             trees.push_back(tempTree);
628             
629             //print tree
630             tempTree->print(outAll);
631         }
632         
633         outAll.close();
634         
635         if (m->control_pressed) {  for (int i = 0; i < trees.size(); i++) {  delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
636         
637         return trees;
638     }
639         catch(exception& e) {
640                 m->errorOut(e, "UnifracUnweightedCommand", "buildTrees");
641                 exit(1);
642         }
643 }
644 /**************************************************************************************************/
645
646 int UnifracUnweightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
647         try {
648         vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
649         
650         Unweighted unweighted(includeRoot);
651         
652         //get unweighted scores for random trees - if random is false iters = 0
653         for (int j = 0; j < iters; j++) {
654             
655             //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison
656             randomData = unweighted.getValues(thisTree, "", "", processors, outputDir);
657             
658             if (m->control_pressed) { return 0; }
659                         
660             for(int k = 0; k < numComp; k++) {  
661                 //add trees unweighted score to map of scores
662                 map<float,float>::iterator it = rscoreFreq[k].find(randomData[k]);
663                 if (it != rscoreFreq[k].end()) {//already have that score
664                     rscoreFreq[k][randomData[k]]++;
665                 }else{//first time we have seen this score
666                     rscoreFreq[k][randomData[k]] = 1;
667                 }
668                                 
669                 //add randoms score to validscores
670                 validScores[randomData[k]] = randomData[k];
671             }
672         }
673         
674         for(int a = 0; a < numComp; a++) {
675             float rcumul = 1.0000;
676     
677             //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
678             for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
679                 //make rscoreFreq map and rCumul
680                 map<float,float>::iterator it2 = rscoreFreq[a].find(it->first);
681                 rCumul[a][it->first] = rcumul;
682                 //get percentage of random trees with that info
683                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
684                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
685             }
686             UWScoreSig[a].push_back(rCumul[a][usersScores[a]]);
687         }
688         
689         return 0;
690         }
691         catch(exception& e) {
692                 m->errorOut(e, "UnifracUnweightedCommand", "runRandomCalcs");
693                 exit(1);
694         }
695 }
696 /***********************************************************/
697 void UnifracUnweightedCommand::printUnweightedFile() {
698         try {
699                 vector<double> data;
700                 vector<string> tags;
701                 
702                 tags.push_back("Score");
703                 tags.push_back("RandFreq"); tags.push_back("RandCumul");
704                         
705                 for(int a = 0; a < numComp; a++) {
706                         output->initFile(groupComb[a], tags);
707                         //print each line
708                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
709                                 data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);                                             
710                                 output->output(data);
711                                 data.clear();
712                         } 
713                         output->resetFile();
714                 }
715         }
716         catch(exception& e) {
717                 m->errorOut(e, "UnifracUnweightedCommand", "printUnweightedFile");
718                 exit(1);
719         }
720 }
721
722 /***********************************************************/
723 void UnifracUnweightedCommand::printUWSummaryFile(int i) {
724         try {
725                                 
726                 //format output
727                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
728                         
729                 //print each line
730
731                 for(int a = 0; a < numComp; a++) {
732                         outSum << i+1 << '\t';
733                         m->mothurOut(toString(i+1) + "\t");
734                         
735                         if (random) {
736                                 if (UWScoreSig[a][0] > (1/(float)iters)) {
737                                         outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
738                                         cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl; 
739                                         m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0])  + "\t" + toString(UWScoreSig[a][0])+ "\n"); 
740                                 }else {
741                                         outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
742                                         cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
743                                         m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0])  + "\t<" + toString((1/float(iters))) + "\n"); 
744                                 }
745                         }else{
746                                 outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0]  << endl;
747                                 cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0]  << endl; 
748                                 m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0]) + "\n");
749                         }
750                 }
751                 
752         }
753         catch(exception& e) {
754                 m->errorOut(e, "UnifracUnweightedCommand", "printUWSummaryFile");
755                 exit(1);
756         }
757 }
758 /***********************************************************/
759 void UnifracUnweightedCommand::createPhylipFile(int i) {
760         try {
761                 string phylipFileName;
762                 if ((outputForm == "lt") || (outputForm == "square")) {
763                         phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted.phylip.dist";
764                         outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
765                 }else { //column
766                         phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted.column.dist";
767                         outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
768                 }
769                 
770                 ofstream out;
771                 m->openOutputFile(phylipFileName, out);
772                 
773                 if ((outputForm == "lt") || (outputForm == "square")) {
774                         //output numSeqs
775                         out << m->getNumGroups() << endl;
776                 }
777                 
778                 //make matrix with scores in it
779                 vector< vector<float> > dists;  dists.resize(m->getNumGroups());
780                 for (int i = 0; i < m->getNumGroups(); i++) {
781                         dists[i].resize(m->getNumGroups(), 0.0);
782                 }
783                 
784                 //flip it so you can print it
785                 int count = 0;
786                 for (int r=0; r<m->getNumGroups(); r++) { 
787                         for (int l = 0; l < r; l++) {
788                                 dists[r][l] = utreeScores[count][0];
789                                 dists[l][r] = utreeScores[count][0];
790                                 count++;
791                         }
792                 }
793                 
794                 //output to file
795                 for (int r=0; r<m->getNumGroups(); r++) { 
796                         //output name
797                         string name = (m->getGroups())[r];
798                         if (name.length() < 10) { //pad with spaces to make compatible
799                                 while (name.length() < 10) {  name += " ";  }
800                         }
801                         
802                         if (outputForm == "lt") {
803                                 out << name << '\t';
804                         
805                                 //output distances
806                                 for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
807                                 out << endl;
808                         }else if (outputForm == "square") {
809                                 out << name << '\t';
810                                 
811                                 //output distances
812                                 for (int l = 0; l < m->getNumGroups(); l++) {   out << dists[r][l] << '\t';  }
813                                 out << endl;
814                         }else{
815                                 //output distances
816                                 for (int l = 0; l < r; l++) {   
817                                         string otherName = (m->getGroups())[l];
818                                         if (otherName.length() < 10) { //pad with spaces to make compatible
819                                                 while (otherName.length() < 10) {  otherName += " ";  }
820                                         }
821                                         
822                                         out  << name << '\t' << otherName << dists[r][l] << endl;  
823                                 }
824                         }
825                 }
826                 out.close();
827         }
828         catch(exception& e) {
829                 m->errorOut(e, "UnifracUnweightedCommand", "createPhylipFile");
830                 exit(1);
831         }
832 }
833 /***********************************************************/
834
835
836
837