2 * unifracunweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracunweightedcommand.h"
11 #include "treereader.h"
12 #include "subsample.h"
13 #include "consensus.h"
15 //**********************************************************************************************************************
16 vector<string> UnifracUnweightedCommand::setParameters(){
18 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
20 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
25 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
26 CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
27 CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
28 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "UnifracUnweightedCommand", "setParameters");
41 //**********************************************************************************************************************
42 string UnifracUnweightedCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The unifrac.unweighted command parameters are tree, group, name, groups, iters, distance, processors, root and random. tree parameter is required unless you have valid current tree file.\n";
46 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n";
47 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
48 helpString += "The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n";
49 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n";
50 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
51 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52 helpString += "The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n";
53 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
54 helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results of the subsampling, as well as a consensus tree built from these trees. Default=F.\n";
55 helpString += "Example unifrac.unweighted(groups=A-B-C, iters=500).\n";
56 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
57 helpString += "The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual.\n";
58 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
62 m->errorOut(e, "UnifracUnweightedCommand", "getHelpString");
66 //**********************************************************************************************************************
67 UnifracUnweightedCommand::UnifracUnweightedCommand(){
69 abort = true; calledHelp = true;
71 vector<string> tempOutNames;
72 outputTypes["unweighted"] = tempOutNames;
73 outputTypes["uwsummary"] = tempOutNames;
74 outputTypes["phylip"] = tempOutNames;
75 outputTypes["column"] = tempOutNames;
76 outputTypes["tree"] = tempOutNames;
79 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
83 /***********************************************************/
84 UnifracUnweightedCommand::UnifracUnweightedCommand(string option) {
86 abort = false; calledHelp = false;
89 //allow user to run help
90 if(option == "help") { help(); abort = true; calledHelp = true; }
91 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
94 vector<string> myArray = setParameters();
96 OptionParser parser(option);
97 map<string,string> parameters = parser.getParameters();
98 map<string,string>::iterator it;
100 ValidParameters validParameter;
102 //check to make sure all parameters are valid for command
103 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
104 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
107 //initialize outputTypes
108 vector<string> tempOutNames;
109 outputTypes["unweighted"] = tempOutNames;
110 outputTypes["uwsummary"] = tempOutNames;
111 outputTypes["phylip"] = tempOutNames;
112 outputTypes["column"] = tempOutNames;
113 outputTypes["tree"] = tempOutNames;
115 //if the user changes the input directory command factory will send this info to us in the output parameter
116 string inputDir = validParameter.validFile(parameters, "inputdir", false);
117 if (inputDir == "not found"){ inputDir = ""; }
120 it = parameters.find("tree");
121 //user has given a template file
122 if(it != parameters.end()){
123 path = m->hasPath(it->second);
124 //if the user has not given a path then, add inputdir. else leave path alone.
125 if (path == "") { parameters["tree"] = inputDir + it->second; }
128 it = parameters.find("group");
129 //user has given a template file
130 if(it != parameters.end()){
131 path = m->hasPath(it->second);
132 //if the user has not given a path then, add inputdir. else leave path alone.
133 if (path == "") { parameters["group"] = inputDir + it->second; }
136 it = parameters.find("name");
137 //user has given a template file
138 if(it != parameters.end()){
139 path = m->hasPath(it->second);
140 //if the user has not given a path then, add inputdir. else leave path alone.
141 if (path == "") { parameters["name"] = inputDir + it->second; }
145 //check for required parameters
146 treefile = validParameter.validFile(parameters, "tree", true);
147 if (treefile == "not open") { abort = true; }
148 else if (treefile == "not found") { //if there is a current design file, use it
149 treefile = m->getTreeFile();
150 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
151 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
152 }else { m->setTreeFile(treefile); }
154 //check for required parameters
155 groupfile = validParameter.validFile(parameters, "group", true);
156 if (groupfile == "not open") { abort = true; }
157 else if (groupfile == "not found") { groupfile = ""; }
158 else { m->setGroupFile(groupfile); }
160 namefile = validParameter.validFile(parameters, "name", true);
161 if (namefile == "not open") { namefile = ""; abort = true; }
162 else if (namefile == "not found") { namefile = ""; }
163 else { m->setNameFile(namefile); }
165 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
167 //check for optional parameter and set defaults
168 // ...at some point should added some additional type checking...
169 groups = validParameter.validFile(parameters, "groups", false);
170 if (groups == "not found") { groups = ""; }
172 m->splitAtDash(groups, Groups);
173 m->setGroups(Groups);
176 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
177 m->mothurConvert(itersString, iters);
179 string temp = validParameter.validFile(parameters, "distance", false);
180 if (temp == "not found") { phylip = false; outputForm = ""; }
182 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
183 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
186 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "f"; }
187 random = m->isTrue(temp);
189 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
190 includeRoot = m->isTrue(temp);
192 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
193 m->setProcessors(temp);
194 m->mothurConvert(temp, processors);
196 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
197 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
199 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
200 else { subsample = false; }
203 if (!subsample) { subsampleIters = 0; }
204 else { subsampleIters = iters; }
206 temp = validParameter.validFile(parameters, "consensus", false); if (temp == "not found") { temp = "F"; }
207 consensus = m->isTrue(temp);
209 if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; }
210 if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; }
211 if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
212 if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
214 if (!random) { iters = 0; } //turn off random calcs
216 //if user selects distance = true and no groups it won't calc the pairwise
217 if ((phylip) && (Groups.size() == 0)) {
219 m->splitAtDash(groups, Groups);
220 m->setGroups(Groups);
223 if (namefile == "") {
224 vector<string> files; files.push_back(treefile);
225 parser.getNameFile(files);
230 catch(exception& e) {
231 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
236 /***********************************************************/
237 int UnifracUnweightedCommand::execute() {
240 if (abort == true) { if (calledHelp) { return 0; } return 2; }
242 m->setTreeFile(treefile);
244 TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
245 T = reader->getTrees();
246 tmap = T[0]->getTreeMap();
247 map<string, string> nameMap = reader->getNames();
248 map<string, string> unique2Dup = reader->getNameMap();
251 sumFile = outputDir + m->getSimpleName(treefile) + ".uwsummary";
252 outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
253 m->openOutputFile(sumFile, outSum);
256 Groups = m->getGroups();
257 vector<string> namesGroups = tmap->getNamesOfGroups();
258 util.setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze
260 Unweighted unweighted(includeRoot);
262 int start = time(NULL);
266 //user has not set size, set size = smallest samples size
267 if (subsampleSize == -1) {
268 vector<string> temp; temp.push_back(Groups[0]);
269 subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
270 for (int i = 1; i < Groups.size(); i++) {
271 temp.clear(); temp.push_back(Groups[i]);
272 int thisSize = (tmap->getNamesSeqs(temp)).size();
273 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
275 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
276 }else { //eliminate any too small groups
277 vector<string> newGroups = Groups;
279 for (int i = 0; i < newGroups.size(); i++) {
280 vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
281 vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
282 int thisSize = thisGroupsSeqs.size();
284 if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); }
285 else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
287 m->setGroups(Groups);
291 util.getCombos(groupComb, Groups, numComp);
292 m->setGroups(Groups);
294 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
296 if (numComp < processors) { processors = numComp; }
298 if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
300 outSum << "Tree#" << '\t' << "Groups" << '\t' << "UWScore" <<'\t';
301 m->mothurOut("Tree#\tGroups\tUWScore\t");
302 if (random) { outSum << "UWSig"; m->mothurOut("UWSig"); }
303 outSum << endl; m->mothurOutEndLine();
305 //get pscores for users trees
306 for (int i = 0; i < T.size(); i++) {
307 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
312 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted", itersString);
313 outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted");
314 outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted");
318 //get unweighted for users tree
319 rscoreFreq.resize(numComp);
320 rCumul.resize(numComp);
321 utreeScores.resize(numComp);
322 UWScoreSig.resize(numComp);
324 vector<double> userData; userData.resize(numComp,0); //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
326 userData = unweighted.getValues(T[i], processors, outputDir); //userData[0] = unweightedscore
328 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }return 0; }
330 //output scores for each combination
331 for(int k = 0; k < numComp; k++) {
333 utreeScores[k].push_back(userData[k]);
335 //add users score to validscores
336 validScores[userData[k]] = userData[k];
338 if (!random) { UWScoreSig[k].push_back(0.0); }
341 if (random) { runRandomCalcs(T[i], userData); }
343 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
346 vector< vector<double> > calcDistsTotals; //each iter, each groupCombos dists. this will be used to make .dist files
347 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
349 if (m->control_pressed) { break; }
351 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
352 TreeMap* newTmap = new TreeMap();
353 newTmap->getCopy(*tmap);
356 Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
358 //uses method of setting groups to doNotIncludeMe
360 //Tree* newTree2 = sample.getSample(T[i], newTmap, nameMap, subsampleSize, unique2Dup);
361 // newTree2->print(cout);
363 //call new weighted function
364 vector<double> iterData; iterData.resize(numComp,0);
365 Unweighted thisUnweighted(includeRoot);
366 iterData = thisUnweighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
368 //save data to make ave dist, std dist
369 calcDistsTotals.push_back(iterData);
372 delete subSampleTree;
374 if((thisIter+1) % 100 == 0){ m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine(); }
377 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
379 if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); }
380 if (consensus) { getConsensusTrees(calcDistsTotals, i); }
383 printUWSummaryFile(i);
384 if (random) { printUnweightedFile(); delete output; }
385 if (phylip) { createPhylipFile(i); }
397 for (int i = 0; i < T.size(); i++) { delete T[i]; }
399 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
401 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
403 //set phylip file as new current phylipfile
405 itTypes = outputTypes.find("phylip");
406 if (itTypes != outputTypes.end()) {
407 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
410 //set column file as new current columnfile
411 itTypes = outputTypes.find("column");
412 if (itTypes != outputTypes.end()) {
413 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
416 m->mothurOutEndLine();
417 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
418 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
419 m->mothurOutEndLine();
424 catch(exception& e) {
425 m->errorOut(e, "UnifracUnweightedCommand", "execute");
429 /**************************************************************************************************/
430 int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
432 //we need to find the average distance and standard deviation for each groups distance
435 vector<double> averages; averages.resize(numComp, 0);
436 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
437 for (int i = 0; i < dists[thisIter].size(); i++) {
438 averages[i] += dists[thisIter][i];
443 for (int i = 0; i < averages.size(); i++) { averages[i] /= (float) subsampleIters; }
445 //find standard deviation
446 vector<double> stdDev; stdDev.resize(numComp, 0);
448 for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
449 for (int j = 0; j < dists[thisIter].size(); j++) {
450 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
453 for (int i = 0; i < stdDev.size(); i++) {
454 stdDev[i] /= (float) subsampleIters;
455 stdDev[i] = sqrt(stdDev[i]);
458 //make matrix with scores in it
459 vector< vector<double> > avedists; avedists.resize(m->getNumGroups());
460 for (int i = 0; i < m->getNumGroups(); i++) {
461 avedists[i].resize(m->getNumGroups(), 0.0);
464 //make matrix with scores in it
465 vector< vector<double> > stddists; stddists.resize(m->getNumGroups());
466 for (int i = 0; i < m->getNumGroups(); i++) {
467 stddists[i].resize(m->getNumGroups(), 0.0);
470 //flip it so you can print it
472 for (int r=0; r<m->getNumGroups(); r++) {
473 for (int l = 0; l < r; l++) {
474 avedists[r][l] = averages[count];
475 avedists[l][r] = averages[count];
476 stddists[r][l] = stdDev[count];
477 stddists[l][r] = stdDev[count];
482 string aveFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".unweighted.ave.dist";
483 outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);
486 m->openOutputFile(aveFileName, out);
488 string stdFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".unweighted.std.dist";
489 outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName);
492 m->openOutputFile(stdFileName, outStd);
494 if ((outputForm == "lt") || (outputForm == "square")) {
496 out << m->getNumGroups() << endl;
497 outStd << m->getNumGroups() << endl;
501 for (int r=0; r<m->getNumGroups(); r++) {
503 string name = (m->getGroups())[r];
504 if (name.length() < 10) { //pad with spaces to make compatible
505 while (name.length() < 10) { name += " "; }
508 if (outputForm == "lt") {
510 outStd << name << '\t';
513 for (int l = 0; l < r; l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t';}
514 out << endl; outStd << endl;
515 }else if (outputForm == "square") {
517 outStd << name << '\t';
520 for (int l = 0; l < m->getNumGroups(); l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t'; }
521 out << endl; outStd << endl;
524 for (int l = 0; l < r; l++) {
525 string otherName = (m->getGroups())[l];
526 if (otherName.length() < 10) { //pad with spaces to make compatible
527 while (otherName.length() < 10) { otherName += " "; }
530 out << name << '\t' << otherName << avedists[r][l] << endl;
531 outStd << name << '\t' << otherName << stddists[r][l] << endl;
540 catch(exception& e) {
541 m->errorOut(e, "UnifracUnweightedCommand", "getAverageSTDMatrices");
546 /**************************************************************************************************/
547 int UnifracUnweightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
550 //used in tree constructor
553 //create treemap class from groupmap for tree class to use
555 newTmap.makeSim(m->getGroups());
557 //clear old tree names if any
558 m->Treenames.clear();
560 //fills globaldatas tree names
561 m->Treenames = m->getGroups();
563 vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
565 if (m->control_pressed) { return 0; }
568 Tree* conTree = con.getTree(newTrees);
570 //create a new filename
571 string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.cons.tre";
572 outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
574 m->openOutputFile(conFile, outTree);
576 if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
582 catch(exception& e) {
583 m->errorOut(e, "UnifracUnweightedCommand", "getConsensusTrees");
587 /**************************************************************************************************/
589 vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
594 //create a new filename
595 string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.all.tre";
596 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
599 m->openOutputFile(outputFile, outAll);
602 for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
604 if (m->control_pressed) { break; }
606 //make matrix with scores in it
607 vector< vector<double> > sims; sims.resize(m->getNumGroups());
608 for (int j = 0; j < m->getNumGroups(); j++) {
609 sims[j].resize(m->getNumGroups(), 0.0);
613 for (int r=0; r<m->getNumGroups(); r++) {
614 for (int l = 0; l < r; l++) {
615 double sim = -(dists[i][count]-1.0);
623 Tree* tempTree = new Tree(&mytmap, sims);
624 map<string, string> empty;
625 tempTree->assembleTree(empty);
627 trees.push_back(tempTree);
630 tempTree->print(outAll);
635 if (m->control_pressed) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
639 catch(exception& e) {
640 m->errorOut(e, "UnifracUnweightedCommand", "buildTrees");
644 /**************************************************************************************************/
646 int UnifracUnweightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
648 vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
650 Unweighted unweighted(includeRoot);
652 //get unweighted scores for random trees - if random is false iters = 0
653 for (int j = 0; j < iters; j++) {
655 //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison
656 randomData = unweighted.getValues(thisTree, "", "", processors, outputDir);
658 if (m->control_pressed) { return 0; }
660 for(int k = 0; k < numComp; k++) {
661 //add trees unweighted score to map of scores
662 map<float,float>::iterator it = rscoreFreq[k].find(randomData[k]);
663 if (it != rscoreFreq[k].end()) {//already have that score
664 rscoreFreq[k][randomData[k]]++;
665 }else{//first time we have seen this score
666 rscoreFreq[k][randomData[k]] = 1;
669 //add randoms score to validscores
670 validScores[randomData[k]] = randomData[k];
674 for(int a = 0; a < numComp; a++) {
675 float rcumul = 1.0000;
677 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
678 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
679 //make rscoreFreq map and rCumul
680 map<float,float>::iterator it2 = rscoreFreq[a].find(it->first);
681 rCumul[a][it->first] = rcumul;
682 //get percentage of random trees with that info
683 if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
684 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
686 UWScoreSig[a].push_back(rCumul[a][usersScores[a]]);
691 catch(exception& e) {
692 m->errorOut(e, "UnifracUnweightedCommand", "runRandomCalcs");
696 /***********************************************************/
697 void UnifracUnweightedCommand::printUnweightedFile() {
702 tags.push_back("Score");
703 tags.push_back("RandFreq"); tags.push_back("RandCumul");
705 for(int a = 0; a < numComp; a++) {
706 output->initFile(groupComb[a], tags);
708 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
709 data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
710 output->output(data);
716 catch(exception& e) {
717 m->errorOut(e, "UnifracUnweightedCommand", "printUnweightedFile");
722 /***********************************************************/
723 void UnifracUnweightedCommand::printUWSummaryFile(int i) {
727 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
731 for(int a = 0; a < numComp; a++) {
732 outSum << i+1 << '\t';
733 m->mothurOut(toString(i+1) + "\t");
736 if (UWScoreSig[a][0] > (1/(float)iters)) {
737 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
738 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
739 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t" + toString(UWScoreSig[a][0])+ "\n");
741 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
742 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
743 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t<" + toString((1/float(iters))) + "\n");
746 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << endl;
747 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << endl;
748 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\n");
753 catch(exception& e) {
754 m->errorOut(e, "UnifracUnweightedCommand", "printUWSummaryFile");
758 /***********************************************************/
759 void UnifracUnweightedCommand::createPhylipFile(int i) {
761 string phylipFileName;
762 if ((outputForm == "lt") || (outputForm == "square")) {
763 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.phylip.dist";
764 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
766 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.column.dist";
767 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
771 m->openOutputFile(phylipFileName, out);
773 if ((outputForm == "lt") || (outputForm == "square")) {
775 out << m->getNumGroups() << endl;
778 //make matrix with scores in it
779 vector< vector<float> > dists; dists.resize(m->getNumGroups());
780 for (int i = 0; i < m->getNumGroups(); i++) {
781 dists[i].resize(m->getNumGroups(), 0.0);
784 //flip it so you can print it
786 for (int r=0; r<m->getNumGroups(); r++) {
787 for (int l = 0; l < r; l++) {
788 dists[r][l] = utreeScores[count][0];
789 dists[l][r] = utreeScores[count][0];
795 for (int r=0; r<m->getNumGroups(); r++) {
797 string name = (m->getGroups())[r];
798 if (name.length() < 10) { //pad with spaces to make compatible
799 while (name.length() < 10) { name += " "; }
802 if (outputForm == "lt") {
806 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
808 }else if (outputForm == "square") {
812 for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; }
816 for (int l = 0; l < r; l++) {
817 string otherName = (m->getGroups())[l];
818 if (otherName.length() < 10) { //pad with spaces to make compatible
819 while (otherName.length() < 10) { otherName += " "; }
822 out << name << '\t' << otherName << dists[r][l] << endl;
828 catch(exception& e) {
829 m->errorOut(e, "UnifracUnweightedCommand", "createPhylipFile");
833 /***********************************************************/