2 * unifracunweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracunweightedcommand.h"
11 #include "treereader.h"
12 #include "subsample.h"
13 #include "consensus.h"
15 //**********************************************************************************************************************
16 vector<string> UnifracUnweightedCommand::setParameters(){
18 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","unweighted-uwsummary",false,true,true); parameters.push_back(ptree);
19 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
20 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
21 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
22 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
23 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25 CommandParameter prandom("random", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prandom);
26 CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "","phylip-column",false,false); parameters.push_back(pdistance);
27 CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
28 CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "","tree",false,false); parameters.push_back(pconsensus);
29 CommandParameter proot("root", "Boolean", "F", "", "", "", "","",false,false); parameters.push_back(proot);
30 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
31 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
33 vector<string> myArray;
34 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
38 m->errorOut(e, "UnifracUnweightedCommand", "setParameters");
42 //**********************************************************************************************************************
43 string UnifracUnweightedCommand::getHelpString(){
45 string helpString = "";
46 helpString += "The unifrac.unweighted command parameters are tree, group, name, count, groups, iters, distance, processors, root and random. tree parameter is required unless you have valid current tree file.\n";
47 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n";
48 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
49 helpString += "The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n";
50 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n";
51 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
52 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
53 helpString += "The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n";
54 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
55 helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results of the subsampling, as well as a consensus tree built from these trees. Default=F.\n";
56 helpString += "Example unifrac.unweighted(groups=A-B-C, iters=500).\n";
57 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
58 helpString += "The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual.\n";
59 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
63 m->errorOut(e, "UnifracUnweightedCommand", "getHelpString");
67 //**********************************************************************************************************************
68 string UnifracUnweightedCommand::getOutputPattern(string type) {
71 if (type == "unweighted") { pattern = "[filename],unweighted-[filename],[tag],unweighted"; }
72 else if (type == "uwsummary") { pattern = "[filename],uwsummary"; }
73 else if (type == "phylip") { pattern = "[filename],[tag],[tag2],dist"; }
74 else if (type == "column") { pattern = "[filename],[tag],[tag2],dist"; }
75 else if (type == "tree") { pattern = "[filename],[tag],[tag2],tre"; }
76 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
81 m->errorOut(e, "UnifracUnweightedCommand", "getOutputPattern");
85 //**********************************************************************************************************************
86 UnifracUnweightedCommand::UnifracUnweightedCommand(){
88 abort = true; calledHelp = true;
90 vector<string> tempOutNames;
91 outputTypes["unweighted"] = tempOutNames;
92 outputTypes["uwsummary"] = tempOutNames;
93 outputTypes["phylip"] = tempOutNames;
94 outputTypes["column"] = tempOutNames;
95 outputTypes["tree"] = tempOutNames;
98 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
102 /***********************************************************/
103 UnifracUnweightedCommand::UnifracUnweightedCommand(string option) {
105 abort = false; calledHelp = false;
108 //allow user to run help
109 if(option == "help") { help(); abort = true; calledHelp = true; }
110 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
113 vector<string> myArray = setParameters();
115 OptionParser parser(option);
116 map<string,string> parameters = parser.getParameters();
117 map<string,string>::iterator it;
119 ValidParameters validParameter;
121 //check to make sure all parameters are valid for command
122 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
123 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
126 //initialize outputTypes
127 vector<string> tempOutNames;
128 outputTypes["unweighted"] = tempOutNames;
129 outputTypes["uwsummary"] = tempOutNames;
130 outputTypes["phylip"] = tempOutNames;
131 outputTypes["column"] = tempOutNames;
132 outputTypes["tree"] = tempOutNames;
134 //if the user changes the input directory command factory will send this info to us in the output parameter
135 string inputDir = validParameter.validFile(parameters, "inputdir", false);
136 if (inputDir == "not found"){ inputDir = ""; }
139 it = parameters.find("tree");
140 //user has given a template file
141 if(it != parameters.end()){
142 path = m->hasPath(it->second);
143 //if the user has not given a path then, add inputdir. else leave path alone.
144 if (path == "") { parameters["tree"] = inputDir + it->second; }
147 it = parameters.find("group");
148 //user has given a template file
149 if(it != parameters.end()){
150 path = m->hasPath(it->second);
151 //if the user has not given a path then, add inputdir. else leave path alone.
152 if (path == "") { parameters["group"] = inputDir + it->second; }
155 it = parameters.find("name");
156 //user has given a template file
157 if(it != parameters.end()){
158 path = m->hasPath(it->second);
159 //if the user has not given a path then, add inputdir. else leave path alone.
160 if (path == "") { parameters["name"] = inputDir + it->second; }
163 it = parameters.find("count");
164 //user has given a template file
165 if(it != parameters.end()){
166 path = m->hasPath(it->second);
167 //if the user has not given a path then, add inputdir. else leave path alone.
168 if (path == "") { parameters["count"] = inputDir + it->second; }
172 //check for required parameters
173 treefile = validParameter.validFile(parameters, "tree", true);
174 if (treefile == "not open") { abort = true; }
175 else if (treefile == "not found") { //if there is a current design file, use it
176 treefile = m->getTreeFile();
177 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
178 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
179 }else { m->setTreeFile(treefile); }
181 //check for required parameters
182 groupfile = validParameter.validFile(parameters, "group", true);
183 if (groupfile == "not open") { abort = true; }
184 else if (groupfile == "not found") { groupfile = ""; }
185 else { m->setGroupFile(groupfile); }
187 namefile = validParameter.validFile(parameters, "name", true);
188 if (namefile == "not open") { namefile = ""; abort = true; }
189 else if (namefile == "not found") { namefile = ""; }
190 else { m->setNameFile(namefile); }
192 countfile = validParameter.validFile(parameters, "count", true);
193 if (countfile == "not open") { countfile = ""; abort = true; }
194 else if (countfile == "not found") { countfile = ""; }
195 else { m->setCountTableFile(countfile); }
197 if ((namefile != "") && (countfile != "")) {
198 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
201 if ((groupfile != "") && (countfile != "")) {
202 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
205 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
207 //check for optional parameter and set defaults
208 // ...at some point should added some additional type checking...
209 groups = validParameter.validFile(parameters, "groups", false);
210 if (groups == "not found") { groups = ""; }
212 m->splitAtDash(groups, Groups);
213 m->setGroups(Groups);
216 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
217 m->mothurConvert(itersString, iters);
219 string temp = validParameter.validFile(parameters, "distance", false);
220 if (temp == "not found") { phylip = false; outputForm = ""; }
222 if (temp=="phylip") { temp = "lt"; }
223 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
224 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
227 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "f"; }
228 random = m->isTrue(temp);
230 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
231 includeRoot = m->isTrue(temp);
233 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
234 m->setProcessors(temp);
235 m->mothurConvert(temp, processors);
237 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
238 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
240 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
241 else { subsample = false; }
244 if (!subsample) { subsampleIters = 0; }
245 else { subsampleIters = iters; }
247 temp = validParameter.validFile(parameters, "consensus", false); if (temp == "not found") { temp = "F"; }
248 consensus = m->isTrue(temp);
250 if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; }
251 if (countfile == "") { if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; } }
254 if ((!testCt.testGroups(countfile)) && (subsample)) {
255 m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true;
258 if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
259 if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
261 if (!random) { iters = 0; } //turn off random calcs
263 //if user selects distance = true and no groups it won't calc the pairwise
264 if ((phylip) && (Groups.size() == 0)) {
266 m->splitAtDash(groups, Groups);
267 m->setGroups(Groups);
271 if (namefile == "") {
272 vector<string> files; files.push_back(treefile);
273 parser.getNameFile(files);
279 catch(exception& e) {
280 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
285 /***********************************************************/
286 int UnifracUnweightedCommand::execute() {
289 if (abort == true) { if (calledHelp) { return 0; } return 2; }
291 m->setTreeFile(treefile);
294 if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
295 else { reader = new TreeReader(treefile, countfile); }
296 T = reader->getTrees();
297 ct = T[0]->getCountTable();
300 map<string, string> variables;
301 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
302 sumFile = getOutputFileName("uwsummary",variables);
303 outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
304 m->openOutputFile(sumFile, outSum);
307 Groups = m->getGroups();
308 vector<string> namesGroups = ct->getNamesOfGroups();
309 util.setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze
311 Unweighted unweighted(includeRoot);
313 int start = time(NULL);
317 //user has not set size, set size = smallest samples size
318 if (subsampleSize == -1) {
319 vector<string> temp; temp.push_back(Groups[0]);
320 subsampleSize = ct->getGroupCount(Groups[0]); //num in first group
321 for (int i = 1; i < Groups.size(); i++) {
322 int thisSize = ct->getGroupCount(Groups[i]);
323 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
325 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
326 }else { //eliminate any too small groups
327 vector<string> newGroups = Groups;
329 for (int i = 0; i < newGroups.size(); i++) {
330 int thisSize = ct->getGroupCount(newGroups[i]);
332 if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); }
333 else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
335 m->setGroups(Groups);
339 util.getCombos(groupComb, Groups, numComp);
340 m->setGroups(Groups);
342 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
344 if (numComp < processors) { processors = numComp; }
346 if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
348 outSum << "Tree#" << '\t' << "Groups" << '\t' << "UWScore" <<'\t';
349 m->mothurOut("Tree#\tGroups\tUWScore\t");
350 if (random) { outSum << "UWSig"; m->mothurOut("UWSig"); }
351 outSum << endl; m->mothurOutEndLine();
353 //get pscores for users trees
354 for (int i = 0; i < T.size(); i++) {
355 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
360 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
361 variables["[tag]"] = toString(i+1);
362 string unFileName = getOutputFileName("unweighted", variables);
363 output = new ColumnFile(unFileName, itersString);
364 outputNames.push_back(unFileName); outputTypes["unweighted"].push_back(unFileName);
368 //get unweighted for users tree
369 rscoreFreq.resize(numComp);
370 rCumul.resize(numComp);
371 utreeScores.resize(numComp);
372 UWScoreSig.resize(numComp);
374 vector<double> userData; userData.resize(numComp,0); //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
376 userData = unweighted.getValues(T[i], processors, outputDir); //userData[0] = unweightedscore
378 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }return 0; }
380 //output scores for each combination
381 for(int k = 0; k < numComp; k++) {
383 utreeScores[k].push_back(userData[k]);
385 //add users score to validscores
386 validScores[userData[k]] = userData[k];
388 if (!random) { UWScoreSig[k].push_back(0.0); }
391 if (random) { runRandomCalcs(T[i], userData); }
393 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
395 int startSubsample = time(NULL);
398 vector< vector<double> > calcDistsTotals; //each iter, each groupCombos dists. this will be used to make .dist files
399 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
400 if (m->control_pressed) { break; }
402 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
403 CountTable* newCt = new CountTable();
405 //uses method of setting groups to doNotIncludeMe
407 if (m->debug) { sampleTime = time(NULL); }
409 Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
411 if (m->debug) { m->mothurOut("[DEBUG]: iter " + toString(thisIter) + " took " + toString(time(NULL) - sampleTime) + " seconds to sample tree.\n"); }
413 //call new weighted function
414 vector<double> iterData; iterData.resize(numComp,0);
415 Unweighted thisUnweighted(includeRoot);
416 iterData = thisUnweighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
418 //save data to make ave dist, std dist
419 calcDistsTotals.push_back(iterData);
422 delete subSampleTree;
424 if((thisIter+1) % 100 == 0){ m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine(); }
426 if (subsample) { m->mothurOut("It took " + toString(time(NULL) - startSubsample) + " secs to run the subsampling."); m->mothurOutEndLine(); }
428 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
430 if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); }
431 if (consensus) { getConsensusTrees(calcDistsTotals, i); }
434 printUWSummaryFile(i);
435 if (random) { printUnweightedFile(); delete output; }
436 if (phylip) { createPhylipFile(i); }
448 for (int i = 0; i < T.size(); i++) { delete T[i]; }
450 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
452 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
454 //set phylip file as new current phylipfile
456 itTypes = outputTypes.find("phylip");
457 if (itTypes != outputTypes.end()) {
458 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
461 //set column file as new current columnfile
462 itTypes = outputTypes.find("column");
463 if (itTypes != outputTypes.end()) {
464 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
467 m->mothurOutEndLine();
468 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
469 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
470 m->mothurOutEndLine();
475 catch(exception& e) {
476 m->errorOut(e, "UnifracUnweightedCommand", "execute");
480 /**************************************************************************************************/
481 int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
483 //we need to find the average distance and standard deviation for each groups distance
486 vector<double> averages; averages.resize(numComp, 0);
487 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
488 for (int i = 0; i < dists[thisIter].size(); i++) {
489 averages[i] += dists[thisIter][i];
494 for (int i = 0; i < averages.size(); i++) { averages[i] /= (float) subsampleIters; }
496 //find standard deviation
497 vector<double> stdDev; stdDev.resize(numComp, 0);
499 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
500 for (int j = 0; j < dists[thisIter].size(); j++) {
501 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
504 for (int i = 0; i < stdDev.size(); i++) {
505 stdDev[i] /= (float) subsampleIters;
506 stdDev[i] = sqrt(stdDev[i]);
509 //make matrix with scores in it
510 vector< vector<double> > avedists; avedists.resize(m->getNumGroups());
511 for (int i = 0; i < m->getNumGroups(); i++) {
512 avedists[i].resize(m->getNumGroups(), 0.0);
515 //make matrix with scores in it
516 vector< vector<double> > stddists; stddists.resize(m->getNumGroups());
517 for (int i = 0; i < m->getNumGroups(); i++) {
518 stddists[i].resize(m->getNumGroups(), 0.0);
521 //flip it so you can print it
523 for (int r=0; r<m->getNumGroups(); r++) {
524 for (int l = 0; l < r; l++) {
525 avedists[r][l] = averages[count];
526 avedists[l][r] = averages[count];
527 stddists[r][l] = stdDev[count];
528 stddists[l][r] = stdDev[count];
533 map<string, string> variables;
534 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
535 variables["[tag]"] = toString(treeNum+1);
536 variables["[tag2]"] = "unweighted.ave";
537 string aveFileName = getOutputFileName("phylip",variables);
538 if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); }
539 else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName); }
541 m->openOutputFile(aveFileName, out);
543 variables["[tag2]"] = "unweighted.std";
544 string stdFileName = getOutputFileName("phylip",variables);
545 if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
546 else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }
548 m->openOutputFile(stdFileName, outStd);
550 if ((outputForm == "lt") || (outputForm == "square")) {
552 out << m->getNumGroups() << endl;
553 outStd << m->getNumGroups() << endl;
557 for (int r=0; r<m->getNumGroups(); r++) {
559 string name = (m->getGroups())[r];
560 if (name.length() < 10) { //pad with spaces to make compatible
561 while (name.length() < 10) { name += " "; }
564 if (outputForm == "lt") {
566 outStd << name << '\t';
569 for (int l = 0; l < r; l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t';}
570 out << endl; outStd << endl;
571 }else if (outputForm == "square") {
573 outStd << name << '\t';
576 for (int l = 0; l < m->getNumGroups(); l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t'; }
577 out << endl; outStd << endl;
580 for (int l = 0; l < r; l++) {
581 string otherName = (m->getGroups())[l];
582 if (otherName.length() < 10) { //pad with spaces to make compatible
583 while (otherName.length() < 10) { otherName += " "; }
586 out << name << '\t' << otherName << avedists[r][l] << endl;
587 outStd << name << '\t' << otherName << stddists[r][l] << endl;
596 catch(exception& e) {
597 m->errorOut(e, "UnifracUnweightedCommand", "getAverageSTDMatrices");
602 /**************************************************************************************************/
603 int UnifracUnweightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
606 //used in tree constructor
609 //create treemap class from groupmap for tree class to use
612 map<string, string> groupMap;
614 for (int i = 0; i < m->getGroups().size(); i++) {
615 nameMap.insert(m->getGroups()[i]);
616 gps.insert(m->getGroups()[i]);
617 groupMap[m->getGroups()[i]] = m->getGroups()[i];
619 newCt.createTable(nameMap, groupMap, gps);
621 //clear old tree names if any
622 m->Treenames.clear();
624 //fills globaldatas tree names
625 m->Treenames = m->getGroups();
627 vector<Tree*> newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created
629 if (m->control_pressed) { return 0; }
632 Tree* conTree = con.getTree(newTrees);
634 //create a new filename
635 map<string, string> variables;
636 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
637 variables["[tag]"] = toString(treeNum+1);
638 variables["[tag2]"] = "unweighted.cons";
639 string conFile = getOutputFileName("tree",variables);
640 outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
642 m->openOutputFile(conFile, outTree);
644 if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
650 catch(exception& e) {
651 m->errorOut(e, "UnifracUnweightedCommand", "getConsensusTrees");
655 /**************************************************************************************************/
657 vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, CountTable& myct) {
662 //create a new filename
663 map<string, string> variables;
664 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
665 variables["[tag]"] = toString(treeNum+1);
666 variables["[tag2]"] = "unweighted.all";
667 string outputFile = getOutputFileName("tree",variables);
668 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
671 m->openOutputFile(outputFile, outAll);
674 for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
676 if (m->control_pressed) { break; }
678 //make matrix with scores in it
679 vector< vector<double> > sims; sims.resize(m->getNumGroups());
680 for (int j = 0; j < m->getNumGroups(); j++) {
681 sims[j].resize(m->getNumGroups(), 0.0);
685 for (int r=0; r<m->getNumGroups(); r++) {
686 for (int l = 0; l < r; l++) {
687 double sim = -(dists[i][count]-1.0);
695 Tree* tempTree = new Tree(&myct, sims);
696 tempTree->assembleTree();
698 trees.push_back(tempTree);
701 tempTree->print(outAll);
706 if (m->control_pressed) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
710 catch(exception& e) {
711 m->errorOut(e, "UnifracUnweightedCommand", "buildTrees");
715 /**************************************************************************************************/
717 int UnifracUnweightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
719 vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
721 Unweighted unweighted(includeRoot);
723 //get unweighted scores for random trees - if random is false iters = 0
724 for (int j = 0; j < iters; j++) {
726 //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison
727 randomData = unweighted.getValues(thisTree, "", "", processors, outputDir);
729 if (m->control_pressed) { return 0; }
731 for(int k = 0; k < numComp; k++) {
732 //add trees unweighted score to map of scores
733 map<float,float>::iterator it = rscoreFreq[k].find(randomData[k]);
734 if (it != rscoreFreq[k].end()) {//already have that score
735 rscoreFreq[k][randomData[k]]++;
736 }else{//first time we have seen this score
737 rscoreFreq[k][randomData[k]] = 1;
740 //add randoms score to validscores
741 validScores[randomData[k]] = randomData[k];
745 for(int a = 0; a < numComp; a++) {
746 float rcumul = 1.0000;
748 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
749 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
750 //make rscoreFreq map and rCumul
751 map<float,float>::iterator it2 = rscoreFreq[a].find(it->first);
752 rCumul[a][it->first] = rcumul;
753 //get percentage of random trees with that info
754 if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
755 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
757 UWScoreSig[a].push_back(rCumul[a][usersScores[a]]);
762 catch(exception& e) {
763 m->errorOut(e, "UnifracUnweightedCommand", "runRandomCalcs");
767 /***********************************************************/
768 void UnifracUnweightedCommand::printUnweightedFile() {
773 tags.push_back("Score");
774 tags.push_back("RandFreq"); tags.push_back("RandCumul");
776 for(int a = 0; a < numComp; a++) {
777 output->initFile(groupComb[a], tags);
779 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
780 data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
781 output->output(data);
787 catch(exception& e) {
788 m->errorOut(e, "UnifracUnweightedCommand", "printUnweightedFile");
793 /***********************************************************/
794 void UnifracUnweightedCommand::printUWSummaryFile(int i) {
798 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
802 for(int a = 0; a < numComp; a++) {
803 outSum << i+1 << '\t';
804 m->mothurOut(toString(i+1) + "\t");
807 if (UWScoreSig[a][0] > (1/(float)iters)) {
808 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
809 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
810 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t" + toString(UWScoreSig[a][0])+ "\n");
812 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
813 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
814 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t<" + toString((1/float(iters))) + "\n");
817 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << endl;
818 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << endl;
819 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\n");
824 catch(exception& e) {
825 m->errorOut(e, "UnifracUnweightedCommand", "printUWSummaryFile");
829 /***********************************************************/
830 void UnifracUnweightedCommand::createPhylipFile(int i) {
832 string phylipFileName;
833 map<string, string> variables;
834 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
835 variables["[tag]"] = toString(i+1);
836 if ((outputForm == "lt") || (outputForm == "square")) {
837 variables["[tag2]"] = "unweighted.phylip";
838 phylipFileName = getOutputFileName("phylip",variables);
839 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
841 variables["[tag2]"] = "unweighted.column";
842 phylipFileName = getOutputFileName("column",variables);
843 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
847 m->openOutputFile(phylipFileName, out);
849 if ((outputForm == "lt") || (outputForm == "square")) {
851 out << m->getNumGroups() << endl;
854 //make matrix with scores in it
855 vector< vector<float> > dists; dists.resize(m->getNumGroups());
856 for (int i = 0; i < m->getNumGroups(); i++) {
857 dists[i].resize(m->getNumGroups(), 0.0);
860 //flip it so you can print it
862 for (int r=0; r<m->getNumGroups(); r++) {
863 for (int l = 0; l < r; l++) {
864 dists[r][l] = utreeScores[count][0];
865 dists[l][r] = utreeScores[count][0];
871 for (int r=0; r<m->getNumGroups(); r++) {
873 string name = (m->getGroups())[r];
874 if (name.length() < 10) { //pad with spaces to make compatible
875 while (name.length() < 10) { name += " "; }
878 if (outputForm == "lt") {
882 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
884 }else if (outputForm == "square") {
888 for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; }
892 for (int l = 0; l < r; l++) {
893 string otherName = (m->getGroups())[l];
894 if (otherName.length() < 10) { //pad with spaces to make compatible
895 while (otherName.length() < 10) { otherName += " "; }
898 out << name << '\t' << otherName << dists[r][l] << endl;
904 catch(exception& e) {
905 m->errorOut(e, "UnifracUnweightedCommand", "createPhylipFile");
909 /***********************************************************/