]> git.donarmstrong.com Git - mothur.git/blob - unifracunweightedcommand.cpp
fixed path redirect issue in shhh.flows. optimized the subsampling in unifrac commands
[mothur.git] / unifracunweightedcommand.cpp
1 /*
2  *  unifracunweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracunweightedcommand.h"
11 #include "treereader.h"
12 #include "subsample.h"
13 #include "consensus.h"
14
15 //**********************************************************************************************************************
16 vector<string> UnifracUnweightedCommand::setParameters(){       
17         try {
18                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24                 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
25                 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
26         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
27         CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
28         CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
29                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
31                 
32                 vector<string> myArray;
33                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
34                 return myArray;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "UnifracUnweightedCommand", "setParameters");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 string UnifracUnweightedCommand::getHelpString(){       
43         try {
44                 string helpString = "";
45                 helpString += "The unifrac.unweighted command parameters are tree, group, name, groups, iters, distance, processors, root and random.  tree parameter is required unless you have valid current tree file.\n";
46                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n";
47                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
48                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n";
49                 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n";
50                 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
51                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52                 helpString += "The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n";
53         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
54         helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results of the subsampling, as well as a consensus tree built from these trees. Default=F.\n";
55                 helpString += "Example unifrac.unweighted(groups=A-B-C, iters=500).\n";
56                 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
57                 helpString += "The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual.\n";
58                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
59                 return helpString;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "UnifracUnweightedCommand", "getHelpString");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 UnifracUnweightedCommand::UnifracUnweightedCommand(){   
68         try {
69                 abort = true; calledHelp = true; 
70                 setParameters();
71                 vector<string> tempOutNames;
72                 outputTypes["unweighted"] = tempOutNames;
73                 outputTypes["uwsummary"] = tempOutNames;
74                 outputTypes["phylip"] = tempOutNames;
75                 outputTypes["column"] = tempOutNames;
76         outputTypes["tree"] = tempOutNames;
77         }
78         catch(exception& e) {
79                 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
80                 exit(1);
81         }
82 }
83 /***********************************************************/
84 UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
85         try {
86                 abort = false; calledHelp = false;   
87                 
88                         
89                 //allow user to run help
90                 if(option == "help") { help(); abort = true; calledHelp = true; }
91                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
92                 
93                 else {
94                         vector<string> myArray = setParameters();
95                         
96                         OptionParser parser(option);
97                         map<string,string> parameters = parser.getParameters();
98                         map<string,string>::iterator it;
99                         
100                         ValidParameters validParameter;
101                 
102                         //check to make sure all parameters are valid for command
103                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
104                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
105                         }
106                         
107                         //initialize outputTypes
108                         vector<string> tempOutNames;
109                         outputTypes["unweighted"] = tempOutNames;
110                         outputTypes["uwsummary"] = tempOutNames;
111                         outputTypes["phylip"] = tempOutNames;
112                         outputTypes["column"] = tempOutNames;
113             outputTypes["tree"] = tempOutNames;
114                         
115                         //if the user changes the input directory command factory will send this info to us in the output parameter 
116                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
117                         if (inputDir == "not found"){   inputDir = "";          }
118                         else {
119                                 string path;
120                                 it = parameters.find("tree");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
126                                 }
127                                 
128                                 it = parameters.find("group");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
134                                 }
135                                 
136                                 it = parameters.find("name");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
142                                 }
143                         }
144                         
145             //check for required parameters
146                         treefile = validParameter.validFile(parameters, "tree", true);
147                         if (treefile == "not open") { abort = true; }
148                         else if (treefile == "not found") {                             //if there is a current design file, use it
149                                 treefile = m->getTreeFile(); 
150                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
151                                 else {  m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }                                                               
152                         }else { m->setTreeFile(treefile); }     
153                         
154                         //check for required parameters
155                         groupfile = validParameter.validFile(parameters, "group", true);
156                         if (groupfile == "not open") { abort = true; }
157                         else if (groupfile == "not found") { groupfile = ""; }
158                         else { m->setGroupFile(groupfile); }
159                         
160                         namefile = validParameter.validFile(parameters, "name", true);
161                         if (namefile == "not open") { namefile = ""; abort = true; }
162                         else if (namefile == "not found") { namefile = ""; }
163                         else { m->setNameFile(namefile); }
164                         
165                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
166                         
167                         //check for optional parameter and set defaults
168                         // ...at some point should added some additional type checking...
169                         groups = validParameter.validFile(parameters, "groups", false);                 
170                         if (groups == "not found") { groups = ""; }
171                         else { 
172                                 m->splitAtDash(groups, Groups);
173                                 m->setGroups(Groups);
174                         }
175                                 
176                         itersString = validParameter.validFile(parameters, "iters", false);                             if (itersString == "not found") { itersString = "1000"; }
177                         m->mothurConvert(itersString, iters); 
178                         
179                         string temp = validParameter.validFile(parameters, "distance", false);                  
180                         if (temp == "not found") { phylip = false; outputForm = ""; }
181                         else{
182                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
183                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
184                         }
185                         
186                         temp = validParameter.validFile(parameters, "random", false);                                   if (temp == "not found") { temp = "f"; }
187                         random = m->isTrue(temp);
188                         
189                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
190                         includeRoot = m->isTrue(temp);
191                         
192                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
193                         m->setProcessors(temp);
194                         m->mothurConvert(temp, processors); 
195                         
196             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
197                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
198             else {  
199                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
200                 else { subsample = false; }
201             }
202                         
203             if (!subsample) { subsampleIters = 0;   }
204             else { subsampleIters = iters;          }
205             
206             temp = validParameter.validFile(parameters, "consensus", false);                                    if (temp == "not found") { temp = "F"; }
207                         consensus = m->isTrue(temp);
208             
209                         if (subsample && random) {  m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true;  } 
210                         if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } 
211             if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
212             if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
213
214                         if (!random) {  iters = 0;  } //turn off random calcs
215                         
216                         //if user selects distance = true and no groups it won't calc the pairwise
217                         if ((phylip) && (Groups.size() == 0)) {
218                                 groups = "all";
219                                 m->splitAtDash(groups, Groups);
220                                 m->setGroups(Groups);
221                         }
222                         
223                         if (namefile == "") {
224                                 vector<string> files; files.push_back(treefile);
225                                 parser.getNameFile(files);
226                         }
227                 }
228                 
229         }
230         catch(exception& e) {
231                 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
232                 exit(1);
233         }
234 }
235
236 /***********************************************************/
237 int UnifracUnweightedCommand::execute() {
238         try {
239                 
240                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
241                 
242                 m->setTreeFile(treefile);
243                 
244                 TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
245         T = reader->getTrees();
246         tmap = T[0]->getTreeMap();
247         map<string, string> nameMap = reader->getNames();
248         map<string, string> unique2Dup = reader->getNameMap();
249         delete reader;  
250         
251                 sumFile = outputDir + m->getSimpleName(treefile) + ".uwsummary";
252                 outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
253                 m->openOutputFile(sumFile, outSum);
254                 
255                 SharedUtil util;
256                 Groups = m->getGroups();
257                 vector<string> namesGroups = tmap->getNamesOfGroups();
258                 util.setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted");        //sets the groups the user wants to analyze
259                 
260                 Unweighted unweighted(includeRoot);
261                 
262                 int start = time(NULL);
263         
264         //set or check size
265         if (subsample) {
266             //user has not set size, set size = smallest samples size
267             if (subsampleSize == -1) { 
268                 vector<string> temp; temp.push_back(Groups[0]);
269                 subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
270                 for (int i = 1; i < Groups.size(); i++) {
271                     temp.clear(); temp.push_back(Groups[i]);
272                     int thisSize = (tmap->getNamesSeqs(temp)).size();
273                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
274                 }
275                 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
276             }else { //eliminate any too small groups
277                 vector<string> newGroups = Groups;
278                 Groups.clear();
279                 for (int i = 0; i < newGroups.size(); i++) {
280                     vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
281                     vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
282                     int thisSize = thisGroupsSeqs.size();
283                     
284                     if (thisSize >= subsampleSize) {    Groups.push_back(newGroups[i]); }
285                     else {   m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
286                 } 
287                 m->setGroups(Groups);
288             }
289         }
290                 
291         util.getCombos(groupComb, Groups, numComp);
292                 m->setGroups(Groups);
293         
294                 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
295         
296                 if (numComp < processors) { processors = numComp;  }
297         
298         if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
299                 
300                 outSum << "Tree#" << '\t' << "Groups" << '\t'  <<  "UWScore" <<'\t';
301                 m->mothurOut("Tree#\tGroups\tUWScore\t");
302                 if (random) { outSum << "UWSig"; m->mothurOut("UWSig"); }
303                 outSum << endl; m->mothurOutEndLine();
304          
305                 //get pscores for users trees
306                 for (int i = 0; i < T.size(); i++) {
307                         if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
308                         
309             counter = 0;
310                         
311                         if (random)  {  
312                                 output = new ColumnFile(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted", itersString);
313                                 outputNames.push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted");
314                                 outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted");
315                         }
316                         
317                         
318                         //get unweighted for users tree
319                         rscoreFreq.resize(numComp);  
320                         rCumul.resize(numComp);  
321                         utreeScores.resize(numComp);  
322                         UWScoreSig.resize(numComp); 
323             
324             vector<double> userData; userData.resize(numComp,0);  //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
325
326                         userData = unweighted.getValues(T[i], processors, outputDir);  //userData[0] = unweightedscore
327                 
328                         if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close();  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  }return 0; }
329                         
330                         //output scores for each combination
331                         for(int k = 0; k < numComp; k++) {
332                                 //saves users score
333                                 utreeScores[k].push_back(userData[k]);
334                                 
335                                 //add users score to validscores
336                                 validScores[userData[k]] = userData[k];
337                 
338                 if (!random) { UWScoreSig[k].push_back(0.0);    }
339                         }
340             
341             if (random) {  runRandomCalcs(T[i], userData);  }
342                         
343                         if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0;  }
344             
345             int startSubsample = time(NULL);
346             
347             //subsample loop
348             vector< vector<double> > calcDistsTotals;  //each iter, each groupCombos dists. this will be used to make .dist files
349             for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
350                 if (m->control_pressed) { break; }
351                 
352                 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
353                 TreeMap* newTmap = new TreeMap();
354                 //newTmap->getCopy(*tmap);
355                 
356                 //SubSample sample;
357                 //Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
358                 
359                 //uses method of setting groups to doNotIncludeMe
360                 SubSample sample;
361                 Tree* subSampleTree = sample.getSample(T[i], tmap, newTmap, subsampleSize, unique2Dup);
362                 
363                 //call new weighted function
364                 vector<double> iterData; iterData.resize(numComp,0);
365                 Unweighted thisUnweighted(includeRoot);
366                 iterData = thisUnweighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
367                 
368                 //save data to make ave dist, std dist
369                 calcDistsTotals.push_back(iterData);
370                 
371                 delete newTmap;
372                 delete subSampleTree;
373                 
374                 if((thisIter+1) % 100 == 0){    m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine();              }
375             }
376             m->mothurOut("It took " + toString(time(NULL) - startSubsample) + " secs to run the subsampling."); m->mothurOutEndLine();
377             
378             if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]);  } return 0;  }
379
380             if (subsample) {  getAverageSTDMatrices(calcDistsTotals, i); }
381             if (consensus) {  getConsensusTrees(calcDistsTotals, i);  }
382             
383             //print output files
384                         printUWSummaryFile(i);
385                         if (random)  {  printUnweightedFile();  delete output;  }
386                         if (phylip) {   createPhylipFile(i);            }
387                         
388                         rscoreFreq.clear(); 
389                         rCumul.clear();  
390                         validScores.clear(); 
391                         utreeScores.clear();  
392                         UWScoreSig.clear(); 
393                 }
394                 
395
396                 outSum.close();
397                 delete tmap; 
398                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
399                 
400                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }     return 0; }
401                 
402                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
403                 
404                 //set phylip file as new current phylipfile
405                 string current = "";
406                 itTypes = outputTypes.find("phylip");
407                 if (itTypes != outputTypes.end()) {
408                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
409                 }
410                 
411                 //set column file as new current columnfile
412                 itTypes = outputTypes.find("column");
413                 if (itTypes != outputTypes.end()) {
414                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
415                 }
416                 
417                 m->mothurOutEndLine();
418                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
419                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
420                 m->mothurOutEndLine();
421                 
422                 return 0;
423                 
424         }
425         catch(exception& e) {
426                 m->errorOut(e, "UnifracUnweightedCommand", "execute");
427                 exit(1);
428         }
429 }
430 /**************************************************************************************************/
431 int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
432         try {
433         //we need to find the average distance and standard deviation for each groups distance
434         
435         //finds sum
436         vector<double> averages; averages.resize(numComp, 0); 
437         for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
438             for (int i = 0; i < dists[thisIter].size(); i++) {  
439                 averages[i] += dists[thisIter][i];
440             }
441         }
442         
443         //finds average.
444         for (int i = 0; i < averages.size(); i++) {  averages[i] /= (float) subsampleIters; }
445         
446         //find standard deviation
447         vector<double> stdDev; stdDev.resize(numComp, 0);
448         
449         for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
450             for (int j = 0; j < dists[thisIter].size(); j++) {
451                 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
452             }
453         }
454         for (int i = 0; i < stdDev.size(); i++) {  
455             stdDev[i] /= (float) subsampleIters; 
456             stdDev[i] = sqrt(stdDev[i]);
457         }
458         
459         //make matrix with scores in it
460         vector< vector<double> > avedists;      avedists.resize(m->getNumGroups());
461         for (int i = 0; i < m->getNumGroups(); i++) {
462             avedists[i].resize(m->getNumGroups(), 0.0);
463         }
464         
465         //make matrix with scores in it
466         vector< vector<double> > stddists;      stddists.resize(m->getNumGroups());
467         for (int i = 0; i < m->getNumGroups(); i++) {
468             stddists[i].resize(m->getNumGroups(), 0.0);
469         }
470         
471         //flip it so you can print it
472         int count = 0;
473         for (int r=0; r<m->getNumGroups(); r++) { 
474             for (int l = 0; l < r; l++) {
475                 avedists[r][l] = averages[count];
476                 avedists[l][r] = averages[count];
477                 stddists[r][l] = stdDev[count];
478                 stddists[l][r] = stdDev[count];
479                 count++;
480             }
481         }
482         
483         string aveFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".unweighted.ave.dist";
484         outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); 
485         
486         ofstream out;
487         m->openOutputFile(aveFileName, out);
488         
489         string stdFileName = outputDir + m->getSimpleName(treefile)  + toString(treeNum+1) + ".unweighted.std.dist";
490         outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); 
491         
492         ofstream outStd;
493         m->openOutputFile(stdFileName, outStd);
494         
495         if ((outputForm == "lt") || (outputForm == "square")) {
496             //output numSeqs
497             out << m->getNumGroups() << endl;
498             outStd << m->getNumGroups() << endl;
499         }
500         
501         //output to file
502         for (int r=0; r<m->getNumGroups(); r++) { 
503             //output name
504             string name = (m->getGroups())[r];
505             if (name.length() < 10) { //pad with spaces to make compatible
506                 while (name.length() < 10) {  name += " ";  }
507             }
508             
509             if (outputForm == "lt") {
510                 out << name << '\t';
511                 outStd << name << '\t';
512                 
513                 //output distances
514                 for (int l = 0; l < r; l++) {   out  << avedists[r][l] << '\t';  outStd  << stddists[r][l] << '\t';}
515                 out << endl;  outStd << endl;
516             }else if (outputForm == "square") {
517                 out << name << '\t';
518                 outStd << name << '\t';
519                 
520                 //output distances
521                 for (int l = 0; l < m->getNumGroups(); l++) {   out  << avedists[r][l] << '\t'; outStd  << stddists[r][l] << '\t'; }
522                 out << endl; outStd << endl;
523             }else{
524                 //output distances
525                 for (int l = 0; l < r; l++) {   
526                     string otherName = (m->getGroups())[l];
527                     if (otherName.length() < 10) { //pad with spaces to make compatible
528                         while (otherName.length() < 10) {  otherName += " ";  }
529                     }
530                     
531                     out  << name << '\t' << otherName << avedists[r][l] << endl;  
532                     outStd  << name << '\t' << otherName << stddists[r][l] << endl; 
533                 }
534             }
535         }
536         out.close();
537         outStd.close();
538         
539         return 0;
540     }
541         catch(exception& e) {
542                 m->errorOut(e, "UnifracUnweightedCommand", "getAverageSTDMatrices");
543                 exit(1);
544         }
545 }
546
547 /**************************************************************************************************/
548 int UnifracUnweightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
549         try {
550         
551         //used in tree constructor 
552         m->runParse = false;
553         
554         //create treemap class from groupmap for tree class to use
555         TreeMap newTmap;
556         newTmap.makeSim(m->getGroups());
557         
558         //clear  old tree names if any
559         m->Treenames.clear();
560         
561         //fills globaldatas tree names
562         m->Treenames = m->getGroups();
563         
564         vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
565         
566         if (m->control_pressed) { return 0; }
567         
568         Consensus con;
569         Tree* conTree = con.getTree(newTrees);
570         
571         //create a new filename
572         string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.cons.tre";                         
573         outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); 
574         ofstream outTree;
575         m->openOutputFile(conFile, outTree);
576         
577         if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
578         outTree.close();
579         
580         return 0;
581         
582     }
583         catch(exception& e) {
584                 m->errorOut(e, "UnifracUnweightedCommand", "getConsensusTrees");
585                 exit(1);
586         }
587 }
588 /**************************************************************************************************/
589
590 vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
591         try {
592         
593         vector<Tree*> trees;
594         
595         //create a new filename
596         string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.all.tre";                               
597         outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); 
598         
599         ofstream outAll;
600         m->openOutputFile(outputFile, outAll);
601         
602         
603         for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
604             
605             if (m->control_pressed) { break; }
606             
607             //make matrix with scores in it
608             vector< vector<double> > sims;      sims.resize(m->getNumGroups());
609             for (int j = 0; j < m->getNumGroups(); j++) {
610                 sims[j].resize(m->getNumGroups(), 0.0);
611             }
612             
613             int count = 0;
614                         for (int r=0; r<m->getNumGroups(); r++) { 
615                                 for (int l = 0; l < r; l++) {
616                     double sim = -(dists[i][count]-1.0);
617                                         sims[r][l] = sim;
618                                         sims[l][r] = sim;
619                                         count++;
620                                 }
621                         }
622             
623             //create tree
624             Tree* tempTree = new Tree(&mytmap, sims);
625             map<string, string> empty;
626             tempTree->assembleTree(empty);
627             
628             trees.push_back(tempTree);
629             
630             //print tree
631             tempTree->print(outAll);
632         }
633         
634         outAll.close();
635         
636         if (m->control_pressed) {  for (int i = 0; i < trees.size(); i++) {  delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
637         
638         return trees;
639     }
640         catch(exception& e) {
641                 m->errorOut(e, "UnifracUnweightedCommand", "buildTrees");
642                 exit(1);
643         }
644 }
645 /**************************************************************************************************/
646
647 int UnifracUnweightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
648         try {
649         vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
650         
651         Unweighted unweighted(includeRoot);
652         
653         //get unweighted scores for random trees - if random is false iters = 0
654         for (int j = 0; j < iters; j++) {
655             
656             //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison
657             randomData = unweighted.getValues(thisTree, "", "", processors, outputDir);
658             
659             if (m->control_pressed) { return 0; }
660                         
661             for(int k = 0; k < numComp; k++) {  
662                 //add trees unweighted score to map of scores
663                 map<float,float>::iterator it = rscoreFreq[k].find(randomData[k]);
664                 if (it != rscoreFreq[k].end()) {//already have that score
665                     rscoreFreq[k][randomData[k]]++;
666                 }else{//first time we have seen this score
667                     rscoreFreq[k][randomData[k]] = 1;
668                 }
669                                 
670                 //add randoms score to validscores
671                 validScores[randomData[k]] = randomData[k];
672             }
673         }
674         
675         for(int a = 0; a < numComp; a++) {
676             float rcumul = 1.0000;
677     
678             //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
679             for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
680                 //make rscoreFreq map and rCumul
681                 map<float,float>::iterator it2 = rscoreFreq[a].find(it->first);
682                 rCumul[a][it->first] = rcumul;
683                 //get percentage of random trees with that info
684                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
685                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
686             }
687             UWScoreSig[a].push_back(rCumul[a][usersScores[a]]);
688         }
689         
690         return 0;
691         }
692         catch(exception& e) {
693                 m->errorOut(e, "UnifracUnweightedCommand", "runRandomCalcs");
694                 exit(1);
695         }
696 }
697 /***********************************************************/
698 void UnifracUnweightedCommand::printUnweightedFile() {
699         try {
700                 vector<double> data;
701                 vector<string> tags;
702                 
703                 tags.push_back("Score");
704                 tags.push_back("RandFreq"); tags.push_back("RandCumul");
705                         
706                 for(int a = 0; a < numComp; a++) {
707                         output->initFile(groupComb[a], tags);
708                         //print each line
709                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
710                                 data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);                                             
711                                 output->output(data);
712                                 data.clear();
713                         } 
714                         output->resetFile();
715                 }
716         }
717         catch(exception& e) {
718                 m->errorOut(e, "UnifracUnweightedCommand", "printUnweightedFile");
719                 exit(1);
720         }
721 }
722
723 /***********************************************************/
724 void UnifracUnweightedCommand::printUWSummaryFile(int i) {
725         try {
726                                 
727                 //format output
728                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
729                         
730                 //print each line
731
732                 for(int a = 0; a < numComp; a++) {
733                         outSum << i+1 << '\t';
734                         m->mothurOut(toString(i+1) + "\t");
735                         
736                         if (random) {
737                                 if (UWScoreSig[a][0] > (1/(float)iters)) {
738                                         outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
739                                         cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl; 
740                                         m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0])  + "\t" + toString(UWScoreSig[a][0])+ "\n"); 
741                                 }else {
742                                         outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
743                                         cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
744                                         m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0])  + "\t<" + toString((1/float(iters))) + "\n"); 
745                                 }
746                         }else{
747                                 outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0]  << endl;
748                                 cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0]  << endl; 
749                                 m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0]) + "\n");
750                         }
751                 }
752                 
753         }
754         catch(exception& e) {
755                 m->errorOut(e, "UnifracUnweightedCommand", "printUWSummaryFile");
756                 exit(1);
757         }
758 }
759 /***********************************************************/
760 void UnifracUnweightedCommand::createPhylipFile(int i) {
761         try {
762                 string phylipFileName;
763                 if ((outputForm == "lt") || (outputForm == "square")) {
764                         phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted.phylip.dist";
765                         outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
766                 }else { //column
767                         phylipFileName = outputDir + m->getSimpleName(treefile)  + toString(i+1) + ".unweighted.column.dist";
768                         outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
769                 }
770                 
771                 ofstream out;
772                 m->openOutputFile(phylipFileName, out);
773                 
774                 if ((outputForm == "lt") || (outputForm == "square")) {
775                         //output numSeqs
776                         out << m->getNumGroups() << endl;
777                 }
778                 
779                 //make matrix with scores in it
780                 vector< vector<float> > dists;  dists.resize(m->getNumGroups());
781                 for (int i = 0; i < m->getNumGroups(); i++) {
782                         dists[i].resize(m->getNumGroups(), 0.0);
783                 }
784                 
785                 //flip it so you can print it
786                 int count = 0;
787                 for (int r=0; r<m->getNumGroups(); r++) { 
788                         for (int l = 0; l < r; l++) {
789                                 dists[r][l] = utreeScores[count][0];
790                                 dists[l][r] = utreeScores[count][0];
791                                 count++;
792                         }
793                 }
794                 
795                 //output to file
796                 for (int r=0; r<m->getNumGroups(); r++) { 
797                         //output name
798                         string name = (m->getGroups())[r];
799                         if (name.length() < 10) { //pad with spaces to make compatible
800                                 while (name.length() < 10) {  name += " ";  }
801                         }
802                         
803                         if (outputForm == "lt") {
804                                 out << name << '\t';
805                         
806                                 //output distances
807                                 for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
808                                 out << endl;
809                         }else if (outputForm == "square") {
810                                 out << name << '\t';
811                                 
812                                 //output distances
813                                 for (int l = 0; l < m->getNumGroups(); l++) {   out << dists[r][l] << '\t';  }
814                                 out << endl;
815                         }else{
816                                 //output distances
817                                 for (int l = 0; l < r; l++) {   
818                                         string otherName = (m->getGroups())[l];
819                                         if (otherName.length() < 10) { //pad with spaces to make compatible
820                                                 while (otherName.length() < 10) {  otherName += " ";  }
821                                         }
822                                         
823                                         out  << name << '\t' << otherName << dists[r][l] << endl;  
824                                 }
825                         }
826                 }
827                 out.close();
828         }
829         catch(exception& e) {
830                 m->errorOut(e, "UnifracUnweightedCommand", "createPhylipFile");
831                 exit(1);
832         }
833 }
834 /***********************************************************/
835
836
837
838