2 * unifracunweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracunweightedcommand.h"
11 #include "treereader.h"
12 #include "subsample.h"
13 #include "consensus.h"
15 //**********************************************************************************************************************
16 vector<string> UnifracUnweightedCommand::setParameters(){
18 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
20 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
25 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
26 CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
27 CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
28 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "UnifracUnweightedCommand", "setParameters");
41 //**********************************************************************************************************************
42 string UnifracUnweightedCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The unifrac.unweighted command parameters are tree, group, name, groups, iters, distance, processors, root and random. tree parameter is required unless you have valid current tree file.\n";
46 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n";
47 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
48 helpString += "The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n";
49 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n";
50 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
51 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52 helpString += "The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n";
53 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
54 helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results of the subsampling, as well as a consensus tree built from these trees. Default=F.\n";
55 helpString += "Example unifrac.unweighted(groups=A-B-C, iters=500).\n";
56 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
57 helpString += "The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual.\n";
58 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
62 m->errorOut(e, "UnifracUnweightedCommand", "getHelpString");
66 //**********************************************************************************************************************
67 UnifracUnweightedCommand::UnifracUnweightedCommand(){
69 abort = true; calledHelp = true;
71 vector<string> tempOutNames;
72 outputTypes["unweighted"] = tempOutNames;
73 outputTypes["uwsummary"] = tempOutNames;
74 outputTypes["phylip"] = tempOutNames;
75 outputTypes["column"] = tempOutNames;
76 outputTypes["tree"] = tempOutNames;
79 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
83 /***********************************************************/
84 UnifracUnweightedCommand::UnifracUnweightedCommand(string option) {
86 abort = false; calledHelp = false;
89 //allow user to run help
90 if(option == "help") { help(); abort = true; calledHelp = true; }
91 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
94 vector<string> myArray = setParameters();
96 OptionParser parser(option);
97 map<string,string> parameters = parser.getParameters();
98 map<string,string>::iterator it;
100 ValidParameters validParameter;
102 //check to make sure all parameters are valid for command
103 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
104 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
107 //initialize outputTypes
108 vector<string> tempOutNames;
109 outputTypes["unweighted"] = tempOutNames;
110 outputTypes["uwsummary"] = tempOutNames;
111 outputTypes["phylip"] = tempOutNames;
112 outputTypes["column"] = tempOutNames;
113 outputTypes["tree"] = tempOutNames;
115 //if the user changes the input directory command factory will send this info to us in the output parameter
116 string inputDir = validParameter.validFile(parameters, "inputdir", false);
117 if (inputDir == "not found"){ inputDir = ""; }
120 it = parameters.find("tree");
121 //user has given a template file
122 if(it != parameters.end()){
123 path = m->hasPath(it->second);
124 //if the user has not given a path then, add inputdir. else leave path alone.
125 if (path == "") { parameters["tree"] = inputDir + it->second; }
128 it = parameters.find("group");
129 //user has given a template file
130 if(it != parameters.end()){
131 path = m->hasPath(it->second);
132 //if the user has not given a path then, add inputdir. else leave path alone.
133 if (path == "") { parameters["group"] = inputDir + it->second; }
136 it = parameters.find("name");
137 //user has given a template file
138 if(it != parameters.end()){
139 path = m->hasPath(it->second);
140 //if the user has not given a path then, add inputdir. else leave path alone.
141 if (path == "") { parameters["name"] = inputDir + it->second; }
145 //check for required parameters
146 treefile = validParameter.validFile(parameters, "tree", true);
147 if (treefile == "not open") { abort = true; }
148 else if (treefile == "not found") { //if there is a current design file, use it
149 treefile = m->getTreeFile();
150 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
151 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
152 }else { m->setTreeFile(treefile); }
154 //check for required parameters
155 groupfile = validParameter.validFile(parameters, "group", true);
156 if (groupfile == "not open") { abort = true; }
157 else if (groupfile == "not found") { groupfile = ""; }
158 else { m->setGroupFile(groupfile); }
160 namefile = validParameter.validFile(parameters, "name", true);
161 if (namefile == "not open") { namefile = ""; abort = true; }
162 else if (namefile == "not found") { namefile = ""; }
163 else { m->setNameFile(namefile); }
165 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
167 //check for optional parameter and set defaults
168 // ...at some point should added some additional type checking...
169 groups = validParameter.validFile(parameters, "groups", false);
170 if (groups == "not found") { groups = ""; }
172 m->splitAtDash(groups, Groups);
173 m->setGroups(Groups);
176 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
177 m->mothurConvert(itersString, iters);
179 string temp = validParameter.validFile(parameters, "distance", false);
180 if (temp == "not found") { phylip = false; outputForm = ""; }
182 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
183 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
186 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "f"; }
187 random = m->isTrue(temp);
189 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
190 includeRoot = m->isTrue(temp);
192 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
193 m->setProcessors(temp);
194 m->mothurConvert(temp, processors);
196 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
197 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
199 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
200 else { subsample = false; }
203 if (!subsample) { subsampleIters = 0; }
204 else { subsampleIters = iters; }
206 temp = validParameter.validFile(parameters, "consensus", false); if (temp == "not found") { temp = "F"; }
207 consensus = m->isTrue(temp);
209 if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; }
210 if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; }
211 if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
212 if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
214 if (!random) { iters = 0; } //turn off random calcs
216 //if user selects distance = true and no groups it won't calc the pairwise
217 if ((phylip) && (Groups.size() == 0)) {
219 m->splitAtDash(groups, Groups);
220 m->setGroups(Groups);
223 if (namefile == "") {
224 vector<string> files; files.push_back(treefile);
225 parser.getNameFile(files);
230 catch(exception& e) {
231 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
236 /***********************************************************/
237 int UnifracUnweightedCommand::execute() {
240 if (abort == true) { if (calledHelp) { return 0; } return 2; }
242 m->setTreeFile(treefile);
244 TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
245 T = reader->getTrees();
246 tmap = T[0]->getTreeMap();
247 map<string, string> nameMap = reader->getNames();
250 sumFile = outputDir + m->getSimpleName(treefile) + ".uwsummary";
251 outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
252 m->openOutputFile(sumFile, outSum);
255 Groups = m->getGroups();
256 vector<string> namesGroups = tmap->getNamesOfGroups();
257 util.setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze
259 Unweighted unweighted(includeRoot);
261 int start = time(NULL);
265 //user has not set size, set size = smallest samples size
266 if (subsampleSize == -1) {
267 vector<string> temp; temp.push_back(Groups[0]);
268 subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
269 for (int i = 1; i < Groups.size(); i++) {
270 temp.clear(); temp.push_back(Groups[i]);
271 int thisSize = (tmap->getNamesSeqs(temp)).size();
272 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
274 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
275 }else { //eliminate any too small groups
276 vector<string> newGroups = Groups;
278 for (int i = 0; i < newGroups.size(); i++) {
279 vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
280 vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
281 int thisSize = thisGroupsSeqs.size();
283 if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); }
284 else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
286 m->setGroups(Groups);
290 util.getCombos(groupComb, Groups, numComp);
291 m->setGroups(Groups);
293 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
295 if (numComp < processors) { processors = numComp; }
297 if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
299 outSum << "Tree#" << '\t' << "Groups" << '\t' << "UWScore" <<'\t';
300 m->mothurOut("Tree#\tGroups\tUWScore\t");
301 if (random) { outSum << "UWSig"; m->mothurOut("UWSig"); }
302 outSum << endl; m->mothurOutEndLine();
304 //get pscores for users trees
305 for (int i = 0; i < T.size(); i++) {
306 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
311 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted", itersString);
312 outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted");
313 outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted");
317 //get unweighted for users tree
318 rscoreFreq.resize(numComp);
319 rCumul.resize(numComp);
320 utreeScores.resize(numComp);
321 UWScoreSig.resize(numComp);
323 vector<double> userData; userData.resize(numComp,0); //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
325 userData = unweighted.getValues(T[i], processors, outputDir); //userData[0] = unweightedscore
327 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }return 0; }
329 //output scores for each combination
330 for(int k = 0; k < numComp; k++) {
332 utreeScores[k].push_back(userData[k]);
334 //add users score to validscores
335 validScores[userData[k]] = userData[k];
337 if (!random) { UWScoreSig[k].push_back(0.0); }
340 if (random) { runRandomCalcs(T[i], userData); }
342 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
345 vector< vector<double> > calcDistsTotals; //each iter, each groupCombos dists. this will be used to make .dist files
346 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
348 if (m->control_pressed) { break; }
350 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
351 TreeMap* newTmap = new TreeMap();
352 newTmap->getCopy(*tmap);
355 Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
357 //call new weighted function
358 vector<double> iterData; iterData.resize(numComp,0);
359 Unweighted thisUnweighted(includeRoot);
360 iterData = thisUnweighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
362 //save data to make ave dist, std dist
363 calcDistsTotals.push_back(iterData);
366 delete subSampleTree;
368 if((thisIter+1) % 100 == 0){ m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine(); }
371 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
373 if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); }
374 if (consensus) { getConsensusTrees(calcDistsTotals, i); }
377 printUWSummaryFile(i);
378 if (random) { printUnweightedFile(); delete output; }
379 if (phylip) { createPhylipFile(i); }
391 for (int i = 0; i < T.size(); i++) { delete T[i]; }
393 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
395 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
397 //set phylip file as new current phylipfile
399 itTypes = outputTypes.find("phylip");
400 if (itTypes != outputTypes.end()) {
401 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
404 //set column file as new current columnfile
405 itTypes = outputTypes.find("column");
406 if (itTypes != outputTypes.end()) {
407 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
410 m->mothurOutEndLine();
411 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
412 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
413 m->mothurOutEndLine();
418 catch(exception& e) {
419 m->errorOut(e, "UnifracUnweightedCommand", "execute");
423 /**************************************************************************************************/
424 int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
426 //we need to find the average distance and standard deviation for each groups distance
429 vector<double> averages; averages.resize(numComp, 0);
430 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
431 for (int i = 0; i < dists[thisIter].size(); i++) {
432 averages[i] += dists[thisIter][i];
437 for (int i = 0; i < averages.size(); i++) { averages[i] /= (float) subsampleIters; }
439 //find standard deviation
440 vector<double> stdDev; stdDev.resize(numComp, 0);
442 for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
443 for (int j = 0; j < dists[thisIter].size(); j++) {
444 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
447 for (int i = 0; i < stdDev.size(); i++) {
448 stdDev[i] /= (float) subsampleIters;
449 stdDev[i] = sqrt(stdDev[i]);
452 //make matrix with scores in it
453 vector< vector<double> > avedists; avedists.resize(m->getNumGroups());
454 for (int i = 0; i < m->getNumGroups(); i++) {
455 avedists[i].resize(m->getNumGroups(), 0.0);
458 //make matrix with scores in it
459 vector< vector<double> > stddists; stddists.resize(m->getNumGroups());
460 for (int i = 0; i < m->getNumGroups(); i++) {
461 stddists[i].resize(m->getNumGroups(), 0.0);
464 //flip it so you can print it
466 for (int r=0; r<m->getNumGroups(); r++) {
467 for (int l = 0; l < r; l++) {
468 avedists[r][l] = averages[count];
469 avedists[l][r] = averages[count];
470 stddists[r][l] = stdDev[count];
471 stddists[l][r] = stdDev[count];
476 string aveFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".unweighted.ave.dist";
477 outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);
480 m->openOutputFile(aveFileName, out);
482 string stdFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".unweighted.std.dist";
483 outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName);
486 m->openOutputFile(stdFileName, outStd);
488 if ((outputForm == "lt") || (outputForm == "square")) {
490 out << m->getNumGroups() << endl;
491 outStd << m->getNumGroups() << endl;
495 for (int r=0; r<m->getNumGroups(); r++) {
497 string name = (m->getGroups())[r];
498 if (name.length() < 10) { //pad with spaces to make compatible
499 while (name.length() < 10) { name += " "; }
502 if (outputForm == "lt") {
504 outStd << name << '\t';
507 for (int l = 0; l < r; l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t';}
508 out << endl; outStd << endl;
509 }else if (outputForm == "square") {
511 outStd << name << '\t';
514 for (int l = 0; l < m->getNumGroups(); l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t'; }
515 out << endl; outStd << endl;
518 for (int l = 0; l < r; l++) {
519 string otherName = (m->getGroups())[l];
520 if (otherName.length() < 10) { //pad with spaces to make compatible
521 while (otherName.length() < 10) { otherName += " "; }
524 out << name << '\t' << otherName << avedists[r][l] << endl;
525 outStd << name << '\t' << otherName << stddists[r][l] << endl;
534 catch(exception& e) {
535 m->errorOut(e, "UnifracUnweightedCommand", "getAverageSTDMatrices");
540 /**************************************************************************************************/
541 int UnifracUnweightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
544 //used in tree constructor
547 //create treemap class from groupmap for tree class to use
549 newTmap.makeSim(m->getGroups());
551 //clear old tree names if any
552 m->Treenames.clear();
554 //fills globaldatas tree names
555 m->Treenames = m->getGroups();
557 vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
559 if (m->control_pressed) { return 0; }
562 Tree* conTree = con.getTree(newTrees);
564 //create a new filename
565 string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.cons.tre";
566 outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
568 m->openOutputFile(conFile, outTree);
570 if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
576 catch(exception& e) {
577 m->errorOut(e, "UnifracUnweightedCommand", "getConsensusTrees");
581 /**************************************************************************************************/
583 vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
588 //create a new filename
589 string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.all.tre";
590 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
593 m->openOutputFile(outputFile, outAll);
596 for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
598 if (m->control_pressed) { break; }
600 //make matrix with scores in it
601 vector< vector<double> > sims; sims.resize(m->getNumGroups());
602 for (int j = 0; j < m->getNumGroups(); j++) {
603 sims[j].resize(m->getNumGroups(), 0.0);
607 for (int r=0; r<m->getNumGroups(); r++) {
608 for (int l = 0; l < r; l++) {
609 double sim = -(dists[i][count]-1.0);
617 Tree* tempTree = new Tree(&mytmap, sims);
618 map<string, string> empty;
619 tempTree->assembleTree(empty);
621 trees.push_back(tempTree);
624 tempTree->print(outAll);
629 if (m->control_pressed) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
633 catch(exception& e) {
634 m->errorOut(e, "UnifracUnweightedCommand", "buildTrees");
638 /**************************************************************************************************/
640 int UnifracUnweightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
642 vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
644 Unweighted unweighted(includeRoot);
646 //get unweighted scores for random trees - if random is false iters = 0
647 for (int j = 0; j < iters; j++) {
649 //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison
650 randomData = unweighted.getValues(thisTree, "", "", processors, outputDir);
652 if (m->control_pressed) { return 0; }
654 for(int k = 0; k < numComp; k++) {
655 //add trees unweighted score to map of scores
656 map<float,float>::iterator it = rscoreFreq[k].find(randomData[k]);
657 if (it != rscoreFreq[k].end()) {//already have that score
658 rscoreFreq[k][randomData[k]]++;
659 }else{//first time we have seen this score
660 rscoreFreq[k][randomData[k]] = 1;
663 //add randoms score to validscores
664 validScores[randomData[k]] = randomData[k];
668 for(int a = 0; a < numComp; a++) {
669 float rcumul = 1.0000;
671 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
672 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
673 //make rscoreFreq map and rCumul
674 map<float,float>::iterator it2 = rscoreFreq[a].find(it->first);
675 rCumul[a][it->first] = rcumul;
676 //get percentage of random trees with that info
677 if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
678 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
680 UWScoreSig[a].push_back(rCumul[a][usersScores[a]]);
685 catch(exception& e) {
686 m->errorOut(e, "UnifracUnweightedCommand", "runRandomCalcs");
690 /***********************************************************/
691 void UnifracUnweightedCommand::printUnweightedFile() {
696 tags.push_back("Score");
697 tags.push_back("RandFreq"); tags.push_back("RandCumul");
699 for(int a = 0; a < numComp; a++) {
700 output->initFile(groupComb[a], tags);
702 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
703 data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
704 output->output(data);
710 catch(exception& e) {
711 m->errorOut(e, "UnifracUnweightedCommand", "printUnweightedFile");
716 /***********************************************************/
717 void UnifracUnweightedCommand::printUWSummaryFile(int i) {
721 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
725 for(int a = 0; a < numComp; a++) {
726 outSum << i+1 << '\t';
727 m->mothurOut(toString(i+1) + "\t");
730 if (UWScoreSig[a][0] > (1/(float)iters)) {
731 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
732 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
733 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t" + toString(UWScoreSig[a][0])+ "\n");
735 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
736 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
737 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t<" + toString((1/float(iters))) + "\n");
740 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << endl;
741 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << endl;
742 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\n");
747 catch(exception& e) {
748 m->errorOut(e, "UnifracUnweightedCommand", "printUWSummaryFile");
752 /***********************************************************/
753 void UnifracUnweightedCommand::createPhylipFile(int i) {
755 string phylipFileName;
756 if ((outputForm == "lt") || (outputForm == "square")) {
757 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.phylip.dist";
758 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
760 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.column.dist";
761 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
765 m->openOutputFile(phylipFileName, out);
767 if ((outputForm == "lt") || (outputForm == "square")) {
769 out << m->getNumGroups() << endl;
772 //make matrix with scores in it
773 vector< vector<float> > dists; dists.resize(m->getNumGroups());
774 for (int i = 0; i < m->getNumGroups(); i++) {
775 dists[i].resize(m->getNumGroups(), 0.0);
778 //flip it so you can print it
780 for (int r=0; r<m->getNumGroups(); r++) {
781 for (int l = 0; l < r; l++) {
782 dists[r][l] = utreeScores[count][0];
783 dists[l][r] = utreeScores[count][0];
789 for (int r=0; r<m->getNumGroups(); r++) {
791 string name = (m->getGroups())[r];
792 if (name.length() < 10) { //pad with spaces to make compatible
793 while (name.length() < 10) { name += " "; }
796 if (outputForm == "lt") {
800 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
802 }else if (outputForm == "square") {
806 for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; }
810 for (int l = 0; l < r; l++) {
811 string otherName = (m->getGroups())[l];
812 if (otherName.length() < 10) { //pad with spaces to make compatible
813 while (otherName.length() < 10) { otherName += " "; }
816 out << name << '\t' << otherName << dists[r][l] << endl;
822 catch(exception& e) {
823 m->errorOut(e, "UnifracUnweightedCommand", "createPhylipFile");
827 /***********************************************************/