2 * unifracunweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracunweightedcommand.h"
11 #include "treereader.h"
12 #include "subsample.h"
13 #include "consensus.h"
15 //**********************************************************************************************************************
16 vector<string> UnifracUnweightedCommand::setParameters(){
18 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
20 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
25 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
26 CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
27 CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
28 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "UnifracUnweightedCommand", "setParameters");
41 //**********************************************************************************************************************
42 string UnifracUnweightedCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The unifrac.unweighted command parameters are tree, group, name, groups, iters, distance, processors, root and random. tree parameter is required unless you have valid current tree file.\n";
46 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n";
47 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
48 helpString += "The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n";
49 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n";
50 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
51 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52 helpString += "The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n";
53 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
54 helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results of the subsampling, as well as a consensus tree built from these trees. Default=F.\n";
55 helpString += "Example unifrac.unweighted(groups=A-B-C, iters=500).\n";
56 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
57 helpString += "The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual.\n";
58 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
62 m->errorOut(e, "UnifracUnweightedCommand", "getHelpString");
66 //**********************************************************************************************************************
67 UnifracUnweightedCommand::UnifracUnweightedCommand(){
69 abort = true; calledHelp = true;
71 vector<string> tempOutNames;
72 outputTypes["unweighted"] = tempOutNames;
73 outputTypes["uwsummary"] = tempOutNames;
74 outputTypes["phylip"] = tempOutNames;
75 outputTypes["column"] = tempOutNames;
76 outputTypes["tree"] = tempOutNames;
79 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
83 /***********************************************************/
84 UnifracUnweightedCommand::UnifracUnweightedCommand(string option) {
86 abort = false; calledHelp = false;
89 //allow user to run help
90 if(option == "help") { help(); abort = true; calledHelp = true; }
91 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
94 vector<string> myArray = setParameters();
96 OptionParser parser(option);
97 map<string,string> parameters = parser.getParameters();
98 map<string,string>::iterator it;
100 ValidParameters validParameter;
102 //check to make sure all parameters are valid for command
103 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
104 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
107 //initialize outputTypes
108 vector<string> tempOutNames;
109 outputTypes["unweighted"] = tempOutNames;
110 outputTypes["uwsummary"] = tempOutNames;
111 outputTypes["phylip"] = tempOutNames;
112 outputTypes["column"] = tempOutNames;
113 outputTypes["tree"] = tempOutNames;
115 //if the user changes the input directory command factory will send this info to us in the output parameter
116 string inputDir = validParameter.validFile(parameters, "inputdir", false);
117 if (inputDir == "not found"){ inputDir = ""; }
120 it = parameters.find("tree");
121 //user has given a template file
122 if(it != parameters.end()){
123 path = m->hasPath(it->second);
124 //if the user has not given a path then, add inputdir. else leave path alone.
125 if (path == "") { parameters["tree"] = inputDir + it->second; }
128 it = parameters.find("group");
129 //user has given a template file
130 if(it != parameters.end()){
131 path = m->hasPath(it->second);
132 //if the user has not given a path then, add inputdir. else leave path alone.
133 if (path == "") { parameters["group"] = inputDir + it->second; }
136 it = parameters.find("name");
137 //user has given a template file
138 if(it != parameters.end()){
139 path = m->hasPath(it->second);
140 //if the user has not given a path then, add inputdir. else leave path alone.
141 if (path == "") { parameters["name"] = inputDir + it->second; }
145 //check for required parameters
146 treefile = validParameter.validFile(parameters, "tree", true);
147 if (treefile == "not open") { abort = true; }
148 else if (treefile == "not found") { //if there is a current design file, use it
149 treefile = m->getTreeFile();
150 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
151 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
152 }else { m->setTreeFile(treefile); }
154 //check for required parameters
155 groupfile = validParameter.validFile(parameters, "group", true);
156 if (groupfile == "not open") { abort = true; }
157 else if (groupfile == "not found") { groupfile = ""; }
158 else { m->setGroupFile(groupfile); }
160 namefile = validParameter.validFile(parameters, "name", true);
161 if (namefile == "not open") { namefile = ""; abort = true; }
162 else if (namefile == "not found") { namefile = ""; }
163 else { m->setNameFile(namefile); }
165 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
167 //check for optional parameter and set defaults
168 // ...at some point should added some additional type checking...
169 groups = validParameter.validFile(parameters, "groups", false);
170 if (groups == "not found") { groups = ""; }
172 m->splitAtDash(groups, Groups);
173 m->setGroups(Groups);
176 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
177 m->mothurConvert(itersString, iters);
179 string temp = validParameter.validFile(parameters, "distance", false);
180 if (temp == "not found") { phylip = false; outputForm = ""; }
182 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
183 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
186 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "f"; }
187 random = m->isTrue(temp);
189 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
190 includeRoot = m->isTrue(temp);
192 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
193 m->setProcessors(temp);
194 m->mothurConvert(temp, processors);
196 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
197 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
199 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
200 else { subsample = false; }
203 if (!subsample) { subsampleIters = 0; }
204 else { subsampleIters = iters; }
206 temp = validParameter.validFile(parameters, "consensus", false); if (temp == "not found") { temp = "F"; }
207 consensus = m->isTrue(temp);
209 if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; }
210 if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; }
211 if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
212 if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
214 if (!random) { iters = 0; } //turn off random calcs
216 //if user selects distance = true and no groups it won't calc the pairwise
217 if ((phylip) && (Groups.size() == 0)) {
219 m->splitAtDash(groups, Groups);
220 m->setGroups(Groups);
223 if (namefile == "") {
224 vector<string> files; files.push_back(treefile);
225 parser.getNameFile(files);
230 catch(exception& e) {
231 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
236 /***********************************************************/
237 int UnifracUnweightedCommand::execute() {
240 if (abort == true) { if (calledHelp) { return 0; } return 2; }
242 m->setTreeFile(treefile);
244 TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
245 T = reader->getTrees();
246 tmap = T[0]->getTreeMap();
247 map<string, string> nameMap = reader->getNames();
248 map<string, string> unique2Dup = reader->getNameMap();
251 sumFile = outputDir + m->getSimpleName(treefile) + ".uwsummary";
252 outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
253 m->openOutputFile(sumFile, outSum);
256 Groups = m->getGroups();
257 vector<string> namesGroups = tmap->getNamesOfGroups();
258 util.setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze
260 Unweighted unweighted(includeRoot);
262 int start = time(NULL);
266 //user has not set size, set size = smallest samples size
267 if (subsampleSize == -1) {
268 vector<string> temp; temp.push_back(Groups[0]);
269 subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
270 for (int i = 1; i < Groups.size(); i++) {
271 temp.clear(); temp.push_back(Groups[i]);
272 int thisSize = (tmap->getNamesSeqs(temp)).size();
273 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
275 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
276 }else { //eliminate any too small groups
277 vector<string> newGroups = Groups;
279 for (int i = 0; i < newGroups.size(); i++) {
280 vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
281 vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
282 int thisSize = thisGroupsSeqs.size();
284 if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); }
285 else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
287 m->setGroups(Groups);
291 util.getCombos(groupComb, Groups, numComp);
292 m->setGroups(Groups);
294 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
296 if (numComp < processors) { processors = numComp; }
298 if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
300 outSum << "Tree#" << '\t' << "Groups" << '\t' << "UWScore" <<'\t';
301 m->mothurOut("Tree#\tGroups\tUWScore\t");
302 if (random) { outSum << "UWSig"; m->mothurOut("UWSig"); }
303 outSum << endl; m->mothurOutEndLine();
305 //get pscores for users trees
306 for (int i = 0; i < T.size(); i++) {
307 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
312 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted", itersString);
313 outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted");
314 outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted");
318 //get unweighted for users tree
319 rscoreFreq.resize(numComp);
320 rCumul.resize(numComp);
321 utreeScores.resize(numComp);
322 UWScoreSig.resize(numComp);
324 vector<double> userData; userData.resize(numComp,0); //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
326 userData = unweighted.getValues(T[i], processors, outputDir); //userData[0] = unweightedscore
328 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }return 0; }
330 //output scores for each combination
331 for(int k = 0; k < numComp; k++) {
333 utreeScores[k].push_back(userData[k]);
335 //add users score to validscores
336 validScores[userData[k]] = userData[k];
338 if (!random) { UWScoreSig[k].push_back(0.0); }
341 if (random) { runRandomCalcs(T[i], userData); }
343 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
345 int startSubsample = time(NULL);
348 vector< vector<double> > calcDistsTotals; //each iter, each groupCombos dists. this will be used to make .dist files
349 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
350 if (m->control_pressed) { break; }
352 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
353 TreeMap* newTmap = new TreeMap();
354 //newTmap->getCopy(*tmap);
357 //Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
359 //uses method of setting groups to doNotIncludeMe
361 Tree* subSampleTree = sample.getSample(T[i], tmap, newTmap, subsampleSize, unique2Dup);
363 //call new weighted function
364 vector<double> iterData; iterData.resize(numComp,0);
365 Unweighted thisUnweighted(includeRoot);
366 iterData = thisUnweighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
368 //save data to make ave dist, std dist
369 calcDistsTotals.push_back(iterData);
372 delete subSampleTree;
374 if((thisIter+1) % 100 == 0){ m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine(); }
376 m->mothurOut("It took " + toString(time(NULL) - startSubsample) + " secs to run the subsampling."); m->mothurOutEndLine();
378 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
380 if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); }
381 if (consensus) { getConsensusTrees(calcDistsTotals, i); }
384 printUWSummaryFile(i);
385 if (random) { printUnweightedFile(); delete output; }
386 if (phylip) { createPhylipFile(i); }
398 for (int i = 0; i < T.size(); i++) { delete T[i]; }
400 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
402 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
404 //set phylip file as new current phylipfile
406 itTypes = outputTypes.find("phylip");
407 if (itTypes != outputTypes.end()) {
408 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
411 //set column file as new current columnfile
412 itTypes = outputTypes.find("column");
413 if (itTypes != outputTypes.end()) {
414 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
417 m->mothurOutEndLine();
418 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
419 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
420 m->mothurOutEndLine();
425 catch(exception& e) {
426 m->errorOut(e, "UnifracUnweightedCommand", "execute");
430 /**************************************************************************************************/
431 int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
433 //we need to find the average distance and standard deviation for each groups distance
436 vector<double> averages; averages.resize(numComp, 0);
437 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
438 for (int i = 0; i < dists[thisIter].size(); i++) {
439 averages[i] += dists[thisIter][i];
444 for (int i = 0; i < averages.size(); i++) { averages[i] /= (float) subsampleIters; }
446 //find standard deviation
447 vector<double> stdDev; stdDev.resize(numComp, 0);
449 for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
450 for (int j = 0; j < dists[thisIter].size(); j++) {
451 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
454 for (int i = 0; i < stdDev.size(); i++) {
455 stdDev[i] /= (float) subsampleIters;
456 stdDev[i] = sqrt(stdDev[i]);
459 //make matrix with scores in it
460 vector< vector<double> > avedists; avedists.resize(m->getNumGroups());
461 for (int i = 0; i < m->getNumGroups(); i++) {
462 avedists[i].resize(m->getNumGroups(), 0.0);
465 //make matrix with scores in it
466 vector< vector<double> > stddists; stddists.resize(m->getNumGroups());
467 for (int i = 0; i < m->getNumGroups(); i++) {
468 stddists[i].resize(m->getNumGroups(), 0.0);
471 //flip it so you can print it
473 for (int r=0; r<m->getNumGroups(); r++) {
474 for (int l = 0; l < r; l++) {
475 avedists[r][l] = averages[count];
476 avedists[l][r] = averages[count];
477 stddists[r][l] = stdDev[count];
478 stddists[l][r] = stdDev[count];
483 string aveFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".unweighted.ave.dist";
484 outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);
487 m->openOutputFile(aveFileName, out);
489 string stdFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".unweighted.std.dist";
490 outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName);
493 m->openOutputFile(stdFileName, outStd);
495 if ((outputForm == "lt") || (outputForm == "square")) {
497 out << m->getNumGroups() << endl;
498 outStd << m->getNumGroups() << endl;
502 for (int r=0; r<m->getNumGroups(); r++) {
504 string name = (m->getGroups())[r];
505 if (name.length() < 10) { //pad with spaces to make compatible
506 while (name.length() < 10) { name += " "; }
509 if (outputForm == "lt") {
511 outStd << name << '\t';
514 for (int l = 0; l < r; l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t';}
515 out << endl; outStd << endl;
516 }else if (outputForm == "square") {
518 outStd << name << '\t';
521 for (int l = 0; l < m->getNumGroups(); l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t'; }
522 out << endl; outStd << endl;
525 for (int l = 0; l < r; l++) {
526 string otherName = (m->getGroups())[l];
527 if (otherName.length() < 10) { //pad with spaces to make compatible
528 while (otherName.length() < 10) { otherName += " "; }
531 out << name << '\t' << otherName << avedists[r][l] << endl;
532 outStd << name << '\t' << otherName << stddists[r][l] << endl;
541 catch(exception& e) {
542 m->errorOut(e, "UnifracUnweightedCommand", "getAverageSTDMatrices");
547 /**************************************************************************************************/
548 int UnifracUnweightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
551 //used in tree constructor
554 //create treemap class from groupmap for tree class to use
556 newTmap.makeSim(m->getGroups());
558 //clear old tree names if any
559 m->Treenames.clear();
561 //fills globaldatas tree names
562 m->Treenames = m->getGroups();
564 vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
566 if (m->control_pressed) { return 0; }
569 Tree* conTree = con.getTree(newTrees);
571 //create a new filename
572 string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.cons.tre";
573 outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
575 m->openOutputFile(conFile, outTree);
577 if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
583 catch(exception& e) {
584 m->errorOut(e, "UnifracUnweightedCommand", "getConsensusTrees");
588 /**************************************************************************************************/
590 vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
595 //create a new filename
596 string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.all.tre";
597 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
600 m->openOutputFile(outputFile, outAll);
603 for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
605 if (m->control_pressed) { break; }
607 //make matrix with scores in it
608 vector< vector<double> > sims; sims.resize(m->getNumGroups());
609 for (int j = 0; j < m->getNumGroups(); j++) {
610 sims[j].resize(m->getNumGroups(), 0.0);
614 for (int r=0; r<m->getNumGroups(); r++) {
615 for (int l = 0; l < r; l++) {
616 double sim = -(dists[i][count]-1.0);
624 Tree* tempTree = new Tree(&mytmap, sims);
625 map<string, string> empty;
626 tempTree->assembleTree(empty);
628 trees.push_back(tempTree);
631 tempTree->print(outAll);
636 if (m->control_pressed) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
640 catch(exception& e) {
641 m->errorOut(e, "UnifracUnweightedCommand", "buildTrees");
645 /**************************************************************************************************/
647 int UnifracUnweightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
649 vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
651 Unweighted unweighted(includeRoot);
653 //get unweighted scores for random trees - if random is false iters = 0
654 for (int j = 0; j < iters; j++) {
656 //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison
657 randomData = unweighted.getValues(thisTree, "", "", processors, outputDir);
659 if (m->control_pressed) { return 0; }
661 for(int k = 0; k < numComp; k++) {
662 //add trees unweighted score to map of scores
663 map<float,float>::iterator it = rscoreFreq[k].find(randomData[k]);
664 if (it != rscoreFreq[k].end()) {//already have that score
665 rscoreFreq[k][randomData[k]]++;
666 }else{//first time we have seen this score
667 rscoreFreq[k][randomData[k]] = 1;
670 //add randoms score to validscores
671 validScores[randomData[k]] = randomData[k];
675 for(int a = 0; a < numComp; a++) {
676 float rcumul = 1.0000;
678 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
679 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
680 //make rscoreFreq map and rCumul
681 map<float,float>::iterator it2 = rscoreFreq[a].find(it->first);
682 rCumul[a][it->first] = rcumul;
683 //get percentage of random trees with that info
684 if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
685 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
687 UWScoreSig[a].push_back(rCumul[a][usersScores[a]]);
692 catch(exception& e) {
693 m->errorOut(e, "UnifracUnweightedCommand", "runRandomCalcs");
697 /***********************************************************/
698 void UnifracUnweightedCommand::printUnweightedFile() {
703 tags.push_back("Score");
704 tags.push_back("RandFreq"); tags.push_back("RandCumul");
706 for(int a = 0; a < numComp; a++) {
707 output->initFile(groupComb[a], tags);
709 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
710 data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
711 output->output(data);
717 catch(exception& e) {
718 m->errorOut(e, "UnifracUnweightedCommand", "printUnweightedFile");
723 /***********************************************************/
724 void UnifracUnweightedCommand::printUWSummaryFile(int i) {
728 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
732 for(int a = 0; a < numComp; a++) {
733 outSum << i+1 << '\t';
734 m->mothurOut(toString(i+1) + "\t");
737 if (UWScoreSig[a][0] > (1/(float)iters)) {
738 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
739 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
740 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t" + toString(UWScoreSig[a][0])+ "\n");
742 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
743 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
744 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t<" + toString((1/float(iters))) + "\n");
747 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << endl;
748 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << endl;
749 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\n");
754 catch(exception& e) {
755 m->errorOut(e, "UnifracUnweightedCommand", "printUWSummaryFile");
759 /***********************************************************/
760 void UnifracUnweightedCommand::createPhylipFile(int i) {
762 string phylipFileName;
763 if ((outputForm == "lt") || (outputForm == "square")) {
764 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.phylip.dist";
765 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
767 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.column.dist";
768 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
772 m->openOutputFile(phylipFileName, out);
774 if ((outputForm == "lt") || (outputForm == "square")) {
776 out << m->getNumGroups() << endl;
779 //make matrix with scores in it
780 vector< vector<float> > dists; dists.resize(m->getNumGroups());
781 for (int i = 0; i < m->getNumGroups(); i++) {
782 dists[i].resize(m->getNumGroups(), 0.0);
785 //flip it so you can print it
787 for (int r=0; r<m->getNumGroups(); r++) {
788 for (int l = 0; l < r; l++) {
789 dists[r][l] = utreeScores[count][0];
790 dists[l][r] = utreeScores[count][0];
796 for (int r=0; r<m->getNumGroups(); r++) {
798 string name = (m->getGroups())[r];
799 if (name.length() < 10) { //pad with spaces to make compatible
800 while (name.length() < 10) { name += " "; }
803 if (outputForm == "lt") {
807 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
809 }else if (outputForm == "square") {
813 for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; }
817 for (int l = 0; l < r; l++) {
818 string otherName = (m->getGroups())[l];
819 if (otherName.length() < 10) { //pad with spaces to make compatible
820 while (otherName.length() < 10) { otherName += " "; }
823 out << name << '\t' << otherName << dists[r][l] << endl;
829 catch(exception& e) {
830 m->errorOut(e, "UnifracUnweightedCommand", "createPhylipFile");
834 /***********************************************************/