2 * unifracunweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracunweightedcommand.h"
11 #include "treereader.h"
12 #include "subsample.h"
13 #include "consensus.h"
15 //**********************************************************************************************************************
16 vector<string> UnifracUnweightedCommand::setParameters(){
18 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
20 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
25 CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(pdistance);
26 CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
27 CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
28 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "UnifracUnweightedCommand", "setParameters");
41 //**********************************************************************************************************************
42 string UnifracUnweightedCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The unifrac.unweighted command parameters are tree, group, name, groups, iters, distance, processors, root and random. tree parameter is required unless you have valid current tree file.\n";
46 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n";
47 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
48 helpString += "The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n";
49 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n";
50 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
51 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52 helpString += "The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n";
53 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
54 helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results of the subsampling, as well as a consensus tree built from these trees. Default=F.\n";
55 helpString += "Example unifrac.unweighted(groups=A-B-C, iters=500).\n";
56 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
57 helpString += "The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual.\n";
58 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
62 m->errorOut(e, "UnifracUnweightedCommand", "getHelpString");
66 //**********************************************************************************************************************
67 string UnifracUnweightedCommand::getOutputFileNameTag(string type, string inputName=""){
69 string outputFileName = "";
70 map<string, vector<string> >::iterator it;
72 //is this a type this command creates
73 it = outputTypes.find(type);
74 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
76 if (type == "unweighted") { outputFileName = "unweighted"; }
77 else if (type == "uwsummary") { outputFileName = "uwsummary"; }
78 else if (type == "phylip") { outputFileName = "dist"; }
79 else if (type == "column") { outputFileName = "dist"; }
80 else if (type == "tree") { outputFileName = "tre"; }
81 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
83 return outputFileName;
86 m->errorOut(e, "UnifracUnweightedCommand", "getOutputFileNameTag");
91 //**********************************************************************************************************************
92 UnifracUnweightedCommand::UnifracUnweightedCommand(){
94 abort = true; calledHelp = true;
96 vector<string> tempOutNames;
97 outputTypes["unweighted"] = tempOutNames;
98 outputTypes["uwsummary"] = tempOutNames;
99 outputTypes["phylip"] = tempOutNames;
100 outputTypes["column"] = tempOutNames;
101 outputTypes["tree"] = tempOutNames;
103 catch(exception& e) {
104 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
108 /***********************************************************/
109 UnifracUnweightedCommand::UnifracUnweightedCommand(string option) {
111 abort = false; calledHelp = false;
114 //allow user to run help
115 if(option == "help") { help(); abort = true; calledHelp = true; }
116 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
119 vector<string> myArray = setParameters();
121 OptionParser parser(option);
122 map<string,string> parameters = parser.getParameters();
123 map<string,string>::iterator it;
125 ValidParameters validParameter;
127 //check to make sure all parameters are valid for command
128 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
129 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
132 //initialize outputTypes
133 vector<string> tempOutNames;
134 outputTypes["unweighted"] = tempOutNames;
135 outputTypes["uwsummary"] = tempOutNames;
136 outputTypes["phylip"] = tempOutNames;
137 outputTypes["column"] = tempOutNames;
138 outputTypes["tree"] = tempOutNames;
140 //if the user changes the input directory command factory will send this info to us in the output parameter
141 string inputDir = validParameter.validFile(parameters, "inputdir", false);
142 if (inputDir == "not found"){ inputDir = ""; }
145 it = parameters.find("tree");
146 //user has given a template file
147 if(it != parameters.end()){
148 path = m->hasPath(it->second);
149 //if the user has not given a path then, add inputdir. else leave path alone.
150 if (path == "") { parameters["tree"] = inputDir + it->second; }
153 it = parameters.find("group");
154 //user has given a template file
155 if(it != parameters.end()){
156 path = m->hasPath(it->second);
157 //if the user has not given a path then, add inputdir. else leave path alone.
158 if (path == "") { parameters["group"] = inputDir + it->second; }
161 it = parameters.find("name");
162 //user has given a template file
163 if(it != parameters.end()){
164 path = m->hasPath(it->second);
165 //if the user has not given a path then, add inputdir. else leave path alone.
166 if (path == "") { parameters["name"] = inputDir + it->second; }
170 //check for required parameters
171 treefile = validParameter.validFile(parameters, "tree", true);
172 if (treefile == "not open") { abort = true; }
173 else if (treefile == "not found") { //if there is a current design file, use it
174 treefile = m->getTreeFile();
175 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
176 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
177 }else { m->setTreeFile(treefile); }
179 //check for required parameters
180 groupfile = validParameter.validFile(parameters, "group", true);
181 if (groupfile == "not open") { abort = true; }
182 else if (groupfile == "not found") { groupfile = ""; }
183 else { m->setGroupFile(groupfile); }
185 namefile = validParameter.validFile(parameters, "name", true);
186 if (namefile == "not open") { namefile = ""; abort = true; }
187 else if (namefile == "not found") { namefile = ""; }
188 else { m->setNameFile(namefile); }
190 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
192 //check for optional parameter and set defaults
193 // ...at some point should added some additional type checking...
194 groups = validParameter.validFile(parameters, "groups", false);
195 if (groups == "not found") { groups = ""; }
197 m->splitAtDash(groups, Groups);
198 m->setGroups(Groups);
201 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
202 m->mothurConvert(itersString, iters);
204 string temp = validParameter.validFile(parameters, "distance", false);
205 if (temp == "not found") { phylip = false; outputForm = ""; }
207 if (temp=="phylip") { temp = "lt"; }
208 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
209 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
212 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "f"; }
213 random = m->isTrue(temp);
215 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
216 includeRoot = m->isTrue(temp);
218 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
219 m->setProcessors(temp);
220 m->mothurConvert(temp, processors);
222 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
223 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
225 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
226 else { subsample = false; }
229 if (!subsample) { subsampleIters = 0; }
230 else { subsampleIters = iters; }
232 temp = validParameter.validFile(parameters, "consensus", false); if (temp == "not found") { temp = "F"; }
233 consensus = m->isTrue(temp);
235 if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; }
236 if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; }
237 if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
238 if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
240 if (!random) { iters = 0; } //turn off random calcs
242 //if user selects distance = true and no groups it won't calc the pairwise
243 if ((phylip) && (Groups.size() == 0)) {
245 m->splitAtDash(groups, Groups);
246 m->setGroups(Groups);
249 if (namefile == "") {
250 vector<string> files; files.push_back(treefile);
251 parser.getNameFile(files);
256 catch(exception& e) {
257 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
262 /***********************************************************/
263 int UnifracUnweightedCommand::execute() {
266 if (abort == true) { if (calledHelp) { return 0; } return 2; }
268 m->setTreeFile(treefile);
270 TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
271 T = reader->getTrees();
272 tmap = T[0]->getTreeMap();
273 map<string, string> nameMap = reader->getNames();
274 map<string, string> unique2Dup = reader->getNameMap();
277 sumFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("uwsummary");
278 outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
279 m->openOutputFile(sumFile, outSum);
282 Groups = m->getGroups();
283 vector<string> namesGroups = tmap->getNamesOfGroups();
284 util.setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze
286 Unweighted unweighted(includeRoot);
288 int start = time(NULL);
292 //user has not set size, set size = smallest samples size
293 if (subsampleSize == -1) {
294 vector<string> temp; temp.push_back(Groups[0]);
295 subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
296 for (int i = 1; i < Groups.size(); i++) {
297 temp.clear(); temp.push_back(Groups[i]);
298 int thisSize = (tmap->getNamesSeqs(temp)).size();
299 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
301 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
302 }else { //eliminate any too small groups
303 vector<string> newGroups = Groups;
305 for (int i = 0; i < newGroups.size(); i++) {
306 vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
307 vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
308 int thisSize = thisGroupsSeqs.size();
310 if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); }
311 else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
313 m->setGroups(Groups);
317 util.getCombos(groupComb, Groups, numComp);
318 m->setGroups(Groups);
320 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
322 if (numComp < processors) { processors = numComp; }
324 if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
326 outSum << "Tree#" << '\t' << "Groups" << '\t' << "UWScore" <<'\t';
327 m->mothurOut("Tree#\tGroups\tUWScore\t");
328 if (random) { outSum << "UWSig"; m->mothurOut("UWSig"); }
329 outSum << endl; m->mothurOutEndLine();
331 //get pscores for users trees
332 for (int i = 0; i < T.size(); i++) {
333 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
338 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("unweighted"), itersString);
339 outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("unweighted"));
340 outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("unweighted"));
344 //get unweighted for users tree
345 rscoreFreq.resize(numComp);
346 rCumul.resize(numComp);
347 utreeScores.resize(numComp);
348 UWScoreSig.resize(numComp);
350 vector<double> userData; userData.resize(numComp,0); //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
352 userData = unweighted.getValues(T[i], processors, outputDir); //userData[0] = unweightedscore
354 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }return 0; }
356 //output scores for each combination
357 for(int k = 0; k < numComp; k++) {
359 utreeScores[k].push_back(userData[k]);
361 //add users score to validscores
362 validScores[userData[k]] = userData[k];
364 if (!random) { UWScoreSig[k].push_back(0.0); }
367 if (random) { runRandomCalcs(T[i], userData); }
369 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
371 int startSubsample = time(NULL);
374 vector< vector<double> > calcDistsTotals; //each iter, each groupCombos dists. this will be used to make .dist files
375 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
376 if (m->control_pressed) { break; }
378 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
379 TreeMap* newTmap = new TreeMap();
380 //newTmap->getCopy(*tmap);
383 //Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
385 //uses method of setting groups to doNotIncludeMe
387 Tree* subSampleTree = sample.getSample(T[i], tmap, newTmap, subsampleSize, unique2Dup);
389 //call new weighted function
390 vector<double> iterData; iterData.resize(numComp,0);
391 Unweighted thisUnweighted(includeRoot);
392 iterData = thisUnweighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
394 //save data to make ave dist, std dist
395 calcDistsTotals.push_back(iterData);
398 delete subSampleTree;
400 if((thisIter+1) % 100 == 0){ m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine(); }
402 m->mothurOut("It took " + toString(time(NULL) - startSubsample) + " secs to run the subsampling."); m->mothurOutEndLine();
404 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
406 if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); }
407 if (consensus) { getConsensusTrees(calcDistsTotals, i); }
410 printUWSummaryFile(i);
411 if (random) { printUnweightedFile(); delete output; }
412 if (phylip) { createPhylipFile(i); }
424 for (int i = 0; i < T.size(); i++) { delete T[i]; }
426 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
428 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
430 //set phylip file as new current phylipfile
432 itTypes = outputTypes.find("phylip");
433 if (itTypes != outputTypes.end()) {
434 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
437 //set column file as new current columnfile
438 itTypes = outputTypes.find("column");
439 if (itTypes != outputTypes.end()) {
440 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
443 m->mothurOutEndLine();
444 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
445 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
446 m->mothurOutEndLine();
451 catch(exception& e) {
452 m->errorOut(e, "UnifracUnweightedCommand", "execute");
456 /**************************************************************************************************/
457 int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
459 //we need to find the average distance and standard deviation for each groups distance
462 vector<double> averages; averages.resize(numComp, 0);
463 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
464 for (int i = 0; i < dists[thisIter].size(); i++) {
465 averages[i] += dists[thisIter][i];
470 for (int i = 0; i < averages.size(); i++) { averages[i] /= (float) subsampleIters; }
472 //find standard deviation
473 vector<double> stdDev; stdDev.resize(numComp, 0);
475 for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
476 for (int j = 0; j < dists[thisIter].size(); j++) {
477 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
480 for (int i = 0; i < stdDev.size(); i++) {
481 stdDev[i] /= (float) subsampleIters;
482 stdDev[i] = sqrt(stdDev[i]);
485 //make matrix with scores in it
486 vector< vector<double> > avedists; avedists.resize(m->getNumGroups());
487 for (int i = 0; i < m->getNumGroups(); i++) {
488 avedists[i].resize(m->getNumGroups(), 0.0);
491 //make matrix with scores in it
492 vector< vector<double> > stddists; stddists.resize(m->getNumGroups());
493 for (int i = 0; i < m->getNumGroups(); i++) {
494 stddists[i].resize(m->getNumGroups(), 0.0);
497 //flip it so you can print it
499 for (int r=0; r<m->getNumGroups(); r++) {
500 for (int l = 0; l < r; l++) {
501 avedists[r][l] = averages[count];
502 avedists[l][r] = averages[count];
503 stddists[r][l] = stdDev[count];
504 stddists[l][r] = stdDev[count];
509 string aveFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".unweighted.ave." + getOutputFileNameTag("phylip");
510 if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); }
511 else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName); }
513 m->openOutputFile(aveFileName, out);
515 string stdFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".unweighted.std." + getOutputFileNameTag("phylip");
516 if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
517 else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }
519 m->openOutputFile(stdFileName, outStd);
521 if ((outputForm == "lt") || (outputForm == "square")) {
523 out << m->getNumGroups() << endl;
524 outStd << m->getNumGroups() << endl;
528 for (int r=0; r<m->getNumGroups(); r++) {
530 string name = (m->getGroups())[r];
531 if (name.length() < 10) { //pad with spaces to make compatible
532 while (name.length() < 10) { name += " "; }
535 if (outputForm == "lt") {
537 outStd << name << '\t';
540 for (int l = 0; l < r; l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t';}
541 out << endl; outStd << endl;
542 }else if (outputForm == "square") {
544 outStd << name << '\t';
547 for (int l = 0; l < m->getNumGroups(); l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t'; }
548 out << endl; outStd << endl;
551 for (int l = 0; l < r; l++) {
552 string otherName = (m->getGroups())[l];
553 if (otherName.length() < 10) { //pad with spaces to make compatible
554 while (otherName.length() < 10) { otherName += " "; }
557 out << name << '\t' << otherName << avedists[r][l] << endl;
558 outStd << name << '\t' << otherName << stddists[r][l] << endl;
567 catch(exception& e) {
568 m->errorOut(e, "UnifracUnweightedCommand", "getAverageSTDMatrices");
573 /**************************************************************************************************/
574 int UnifracUnweightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
577 //used in tree constructor
580 //create treemap class from groupmap for tree class to use
582 newTmap.makeSim(m->getGroups());
584 //clear old tree names if any
585 m->Treenames.clear();
587 //fills globaldatas tree names
588 m->Treenames = m->getGroups();
590 vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
592 if (m->control_pressed) { return 0; }
595 Tree* conTree = con.getTree(newTrees);
597 //create a new filename
598 string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.cons." + getOutputFileNameTag("tree");
599 outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
601 m->openOutputFile(conFile, outTree);
603 if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
609 catch(exception& e) {
610 m->errorOut(e, "UnifracUnweightedCommand", "getConsensusTrees");
614 /**************************************************************************************************/
616 vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
621 //create a new filename
622 string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.all." + getOutputFileNameTag("tree");
623 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
626 m->openOutputFile(outputFile, outAll);
629 for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
631 if (m->control_pressed) { break; }
633 //make matrix with scores in it
634 vector< vector<double> > sims; sims.resize(m->getNumGroups());
635 for (int j = 0; j < m->getNumGroups(); j++) {
636 sims[j].resize(m->getNumGroups(), 0.0);
640 for (int r=0; r<m->getNumGroups(); r++) {
641 for (int l = 0; l < r; l++) {
642 double sim = -(dists[i][count]-1.0);
650 Tree* tempTree = new Tree(&mytmap, sims);
651 map<string, string> empty;
652 tempTree->assembleTree(empty);
654 trees.push_back(tempTree);
657 tempTree->print(outAll);
662 if (m->control_pressed) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
666 catch(exception& e) {
667 m->errorOut(e, "UnifracUnweightedCommand", "buildTrees");
671 /**************************************************************************************************/
673 int UnifracUnweightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
675 vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
677 Unweighted unweighted(includeRoot);
679 //get unweighted scores for random trees - if random is false iters = 0
680 for (int j = 0; j < iters; j++) {
682 //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison
683 randomData = unweighted.getValues(thisTree, "", "", processors, outputDir);
685 if (m->control_pressed) { return 0; }
687 for(int k = 0; k < numComp; k++) {
688 //add trees unweighted score to map of scores
689 map<float,float>::iterator it = rscoreFreq[k].find(randomData[k]);
690 if (it != rscoreFreq[k].end()) {//already have that score
691 rscoreFreq[k][randomData[k]]++;
692 }else{//first time we have seen this score
693 rscoreFreq[k][randomData[k]] = 1;
696 //add randoms score to validscores
697 validScores[randomData[k]] = randomData[k];
701 for(int a = 0; a < numComp; a++) {
702 float rcumul = 1.0000;
704 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
705 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
706 //make rscoreFreq map and rCumul
707 map<float,float>::iterator it2 = rscoreFreq[a].find(it->first);
708 rCumul[a][it->first] = rcumul;
709 //get percentage of random trees with that info
710 if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
711 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
713 UWScoreSig[a].push_back(rCumul[a][usersScores[a]]);
718 catch(exception& e) {
719 m->errorOut(e, "UnifracUnweightedCommand", "runRandomCalcs");
723 /***********************************************************/
724 void UnifracUnweightedCommand::printUnweightedFile() {
729 tags.push_back("Score");
730 tags.push_back("RandFreq"); tags.push_back("RandCumul");
732 for(int a = 0; a < numComp; a++) {
733 output->initFile(groupComb[a], tags);
735 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
736 data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
737 output->output(data);
743 catch(exception& e) {
744 m->errorOut(e, "UnifracUnweightedCommand", "printUnweightedFile");
749 /***********************************************************/
750 void UnifracUnweightedCommand::printUWSummaryFile(int i) {
754 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
758 for(int a = 0; a < numComp; a++) {
759 outSum << i+1 << '\t';
760 m->mothurOut(toString(i+1) + "\t");
763 if (UWScoreSig[a][0] > (1/(float)iters)) {
764 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
765 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
766 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t" + toString(UWScoreSig[a][0])+ "\n");
768 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
769 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
770 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t<" + toString((1/float(iters))) + "\n");
773 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << endl;
774 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << endl;
775 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\n");
780 catch(exception& e) {
781 m->errorOut(e, "UnifracUnweightedCommand", "printUWSummaryFile");
785 /***********************************************************/
786 void UnifracUnweightedCommand::createPhylipFile(int i) {
788 string phylipFileName;
789 if ((outputForm == "lt") || (outputForm == "square")) {
790 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.phylip." + getOutputFileNameTag("phylip");
791 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
793 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.column." + getOutputFileNameTag("column");
794 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
798 m->openOutputFile(phylipFileName, out);
800 if ((outputForm == "lt") || (outputForm == "square")) {
802 out << m->getNumGroups() << endl;
805 //make matrix with scores in it
806 vector< vector<float> > dists; dists.resize(m->getNumGroups());
807 for (int i = 0; i < m->getNumGroups(); i++) {
808 dists[i].resize(m->getNumGroups(), 0.0);
811 //flip it so you can print it
813 for (int r=0; r<m->getNumGroups(); r++) {
814 for (int l = 0; l < r; l++) {
815 dists[r][l] = utreeScores[count][0];
816 dists[l][r] = utreeScores[count][0];
822 for (int r=0; r<m->getNumGroups(); r++) {
824 string name = (m->getGroups())[r];
825 if (name.length() < 10) { //pad with spaces to make compatible
826 while (name.length() < 10) { name += " "; }
829 if (outputForm == "lt") {
833 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
835 }else if (outputForm == "square") {
839 for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; }
843 for (int l = 0; l < r; l++) {
844 string otherName = (m->getGroups())[l];
845 if (otherName.length() < 10) { //pad with spaces to make compatible
846 while (otherName.length() < 10) { otherName += " "; }
849 out << name << '\t' << otherName << dists[r][l] << endl;
855 catch(exception& e) {
856 m->errorOut(e, "UnifracUnweightedCommand", "createPhylipFile");
860 /***********************************************************/