2 * unifracunweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracunweightedcommand.h"
11 #include "treereader.h"
12 #include "subsample.h"
13 #include "consensus.h"
15 //**********************************************************************************************************************
16 vector<string> UnifracUnweightedCommand::setParameters(){
18 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
20 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
25 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
26 CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
27 CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
28 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "UnifracUnweightedCommand", "setParameters");
41 //**********************************************************************************************************************
42 string UnifracUnweightedCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The unifrac.unweighted command parameters are tree, group, name, groups, iters, distance, processors, root and random. tree parameter is required unless you have valid current tree file.\n";
46 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n";
47 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
48 helpString += "The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n";
49 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n";
50 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
51 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52 helpString += "The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n";
53 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
54 helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results of the subsampling, as well as a consensus tree built from these trees. Default=F.\n";
55 helpString += "Example unifrac.unweighted(groups=A-B-C, iters=500).\n";
56 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
57 helpString += "The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual.\n";
58 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
62 m->errorOut(e, "UnifracUnweightedCommand", "getHelpString");
66 //**********************************************************************************************************************
67 string UnifracUnweightedCommand::getOutputFileNameTag(string type, string inputName=""){
69 string outputFileName = "";
70 map<string, vector<string> >::iterator it;
72 //is this a type this command creates
73 it = outputTypes.find(type);
74 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
76 if (type == "unweighted") { outputFileName = "unweighted"; }
77 else if (type == "uwsummary") { outputFileName = "uwsummary"; }
78 else if (type == "phylip") { outputFileName = "dist"; }
79 else if (type == "column") { outputFileName = "dist"; }
80 else if (type == "tree") { outputFileName = "tre"; }
81 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
83 return outputFileName;
86 m->errorOut(e, "UnifracUnweightedCommand", "getOutputFileNameTag");
91 //**********************************************************************************************************************
92 UnifracUnweightedCommand::UnifracUnweightedCommand(){
94 abort = true; calledHelp = true;
96 vector<string> tempOutNames;
97 outputTypes["unweighted"] = tempOutNames;
98 outputTypes["uwsummary"] = tempOutNames;
99 outputTypes["phylip"] = tempOutNames;
100 outputTypes["column"] = tempOutNames;
101 outputTypes["tree"] = tempOutNames;
103 catch(exception& e) {
104 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
108 /***********************************************************/
109 UnifracUnweightedCommand::UnifracUnweightedCommand(string option) {
111 abort = false; calledHelp = false;
114 //allow user to run help
115 if(option == "help") { help(); abort = true; calledHelp = true; }
116 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
119 vector<string> myArray = setParameters();
121 OptionParser parser(option);
122 map<string,string> parameters = parser.getParameters();
123 map<string,string>::iterator it;
125 ValidParameters validParameter;
127 //check to make sure all parameters are valid for command
128 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
129 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
132 //initialize outputTypes
133 vector<string> tempOutNames;
134 outputTypes["unweighted"] = tempOutNames;
135 outputTypes["uwsummary"] = tempOutNames;
136 outputTypes["phylip"] = tempOutNames;
137 outputTypes["column"] = tempOutNames;
138 outputTypes["tree"] = tempOutNames;
140 //if the user changes the input directory command factory will send this info to us in the output parameter
141 string inputDir = validParameter.validFile(parameters, "inputdir", false);
142 if (inputDir == "not found"){ inputDir = ""; }
145 it = parameters.find("tree");
146 //user has given a template file
147 if(it != parameters.end()){
148 path = m->hasPath(it->second);
149 //if the user has not given a path then, add inputdir. else leave path alone.
150 if (path == "") { parameters["tree"] = inputDir + it->second; }
153 it = parameters.find("group");
154 //user has given a template file
155 if(it != parameters.end()){
156 path = m->hasPath(it->second);
157 //if the user has not given a path then, add inputdir. else leave path alone.
158 if (path == "") { parameters["group"] = inputDir + it->second; }
161 it = parameters.find("name");
162 //user has given a template file
163 if(it != parameters.end()){
164 path = m->hasPath(it->second);
165 //if the user has not given a path then, add inputdir. else leave path alone.
166 if (path == "") { parameters["name"] = inputDir + it->second; }
170 //check for required parameters
171 treefile = validParameter.validFile(parameters, "tree", true);
172 if (treefile == "not open") { abort = true; }
173 else if (treefile == "not found") { //if there is a current design file, use it
174 treefile = m->getTreeFile();
175 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
176 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
177 }else { m->setTreeFile(treefile); }
179 //check for required parameters
180 groupfile = validParameter.validFile(parameters, "group", true);
181 if (groupfile == "not open") { abort = true; }
182 else if (groupfile == "not found") { groupfile = ""; }
183 else { m->setGroupFile(groupfile); }
185 namefile = validParameter.validFile(parameters, "name", true);
186 if (namefile == "not open") { namefile = ""; abort = true; }
187 else if (namefile == "not found") { namefile = ""; }
188 else { m->setNameFile(namefile); }
190 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
192 //check for optional parameter and set defaults
193 // ...at some point should added some additional type checking...
194 groups = validParameter.validFile(parameters, "groups", false);
195 if (groups == "not found") { groups = ""; }
197 m->splitAtDash(groups, Groups);
198 m->setGroups(Groups);
201 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
202 m->mothurConvert(itersString, iters);
204 string temp = validParameter.validFile(parameters, "distance", false);
205 if (temp == "not found") { phylip = false; outputForm = ""; }
207 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
208 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
211 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "f"; }
212 random = m->isTrue(temp);
214 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
215 includeRoot = m->isTrue(temp);
217 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
218 m->setProcessors(temp);
219 m->mothurConvert(temp, processors);
221 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
222 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
224 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
225 else { subsample = false; }
228 if (!subsample) { subsampleIters = 0; }
229 else { subsampleIters = iters; }
231 temp = validParameter.validFile(parameters, "consensus", false); if (temp == "not found") { temp = "F"; }
232 consensus = m->isTrue(temp);
234 if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; }
235 if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; }
236 if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
237 if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
239 if (!random) { iters = 0; } //turn off random calcs
241 //if user selects distance = true and no groups it won't calc the pairwise
242 if ((phylip) && (Groups.size() == 0)) {
244 m->splitAtDash(groups, Groups);
245 m->setGroups(Groups);
248 if (namefile == "") {
249 vector<string> files; files.push_back(treefile);
250 parser.getNameFile(files);
255 catch(exception& e) {
256 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
261 /***********************************************************/
262 int UnifracUnweightedCommand::execute() {
265 if (abort == true) { if (calledHelp) { return 0; } return 2; }
267 m->setTreeFile(treefile);
269 TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
270 T = reader->getTrees();
271 tmap = T[0]->getTreeMap();
272 map<string, string> nameMap = reader->getNames();
273 map<string, string> unique2Dup = reader->getNameMap();
276 sumFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("uwsummary");
277 outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
278 m->openOutputFile(sumFile, outSum);
281 Groups = m->getGroups();
282 vector<string> namesGroups = tmap->getNamesOfGroups();
283 util.setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze
285 Unweighted unweighted(includeRoot);
287 int start = time(NULL);
291 //user has not set size, set size = smallest samples size
292 if (subsampleSize == -1) {
293 vector<string> temp; temp.push_back(Groups[0]);
294 subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
295 for (int i = 1; i < Groups.size(); i++) {
296 temp.clear(); temp.push_back(Groups[i]);
297 int thisSize = (tmap->getNamesSeqs(temp)).size();
298 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
300 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
301 }else { //eliminate any too small groups
302 vector<string> newGroups = Groups;
304 for (int i = 0; i < newGroups.size(); i++) {
305 vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
306 vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
307 int thisSize = thisGroupsSeqs.size();
309 if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); }
310 else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
312 m->setGroups(Groups);
316 util.getCombos(groupComb, Groups, numComp);
317 m->setGroups(Groups);
319 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
321 if (numComp < processors) { processors = numComp; }
323 if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
325 outSum << "Tree#" << '\t' << "Groups" << '\t' << "UWScore" <<'\t';
326 m->mothurOut("Tree#\tGroups\tUWScore\t");
327 if (random) { outSum << "UWSig"; m->mothurOut("UWSig"); }
328 outSum << endl; m->mothurOutEndLine();
330 //get pscores for users trees
331 for (int i = 0; i < T.size(); i++) {
332 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
337 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("unweighted"), itersString);
338 outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("unweighted"));
339 outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("unweighted"));
343 //get unweighted for users tree
344 rscoreFreq.resize(numComp);
345 rCumul.resize(numComp);
346 utreeScores.resize(numComp);
347 UWScoreSig.resize(numComp);
349 vector<double> userData; userData.resize(numComp,0); //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
351 userData = unweighted.getValues(T[i], processors, outputDir); //userData[0] = unweightedscore
353 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }return 0; }
355 //output scores for each combination
356 for(int k = 0; k < numComp; k++) {
358 utreeScores[k].push_back(userData[k]);
360 //add users score to validscores
361 validScores[userData[k]] = userData[k];
363 if (!random) { UWScoreSig[k].push_back(0.0); }
366 if (random) { runRandomCalcs(T[i], userData); }
368 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
370 int startSubsample = time(NULL);
373 vector< vector<double> > calcDistsTotals; //each iter, each groupCombos dists. this will be used to make .dist files
374 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
375 if (m->control_pressed) { break; }
377 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
378 TreeMap* newTmap = new TreeMap();
379 //newTmap->getCopy(*tmap);
382 //Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
384 //uses method of setting groups to doNotIncludeMe
386 Tree* subSampleTree = sample.getSample(T[i], tmap, newTmap, subsampleSize, unique2Dup);
388 //call new weighted function
389 vector<double> iterData; iterData.resize(numComp,0);
390 Unweighted thisUnweighted(includeRoot);
391 iterData = thisUnweighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
393 //save data to make ave dist, std dist
394 calcDistsTotals.push_back(iterData);
397 delete subSampleTree;
399 if((thisIter+1) % 100 == 0){ m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine(); }
401 m->mothurOut("It took " + toString(time(NULL) - startSubsample) + " secs to run the subsampling."); m->mothurOutEndLine();
403 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
405 if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); }
406 if (consensus) { getConsensusTrees(calcDistsTotals, i); }
409 printUWSummaryFile(i);
410 if (random) { printUnweightedFile(); delete output; }
411 if (phylip) { createPhylipFile(i); }
423 for (int i = 0; i < T.size(); i++) { delete T[i]; }
425 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
427 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
429 //set phylip file as new current phylipfile
431 itTypes = outputTypes.find("phylip");
432 if (itTypes != outputTypes.end()) {
433 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
436 //set column file as new current columnfile
437 itTypes = outputTypes.find("column");
438 if (itTypes != outputTypes.end()) {
439 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
442 m->mothurOutEndLine();
443 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
444 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
445 m->mothurOutEndLine();
450 catch(exception& e) {
451 m->errorOut(e, "UnifracUnweightedCommand", "execute");
455 /**************************************************************************************************/
456 int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
458 //we need to find the average distance and standard deviation for each groups distance
461 vector<double> averages; averages.resize(numComp, 0);
462 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
463 for (int i = 0; i < dists[thisIter].size(); i++) {
464 averages[i] += dists[thisIter][i];
469 for (int i = 0; i < averages.size(); i++) { averages[i] /= (float) subsampleIters; }
471 //find standard deviation
472 vector<double> stdDev; stdDev.resize(numComp, 0);
474 for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
475 for (int j = 0; j < dists[thisIter].size(); j++) {
476 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
479 for (int i = 0; i < stdDev.size(); i++) {
480 stdDev[i] /= (float) subsampleIters;
481 stdDev[i] = sqrt(stdDev[i]);
484 //make matrix with scores in it
485 vector< vector<double> > avedists; avedists.resize(m->getNumGroups());
486 for (int i = 0; i < m->getNumGroups(); i++) {
487 avedists[i].resize(m->getNumGroups(), 0.0);
490 //make matrix with scores in it
491 vector< vector<double> > stddists; stddists.resize(m->getNumGroups());
492 for (int i = 0; i < m->getNumGroups(); i++) {
493 stddists[i].resize(m->getNumGroups(), 0.0);
496 //flip it so you can print it
498 for (int r=0; r<m->getNumGroups(); r++) {
499 for (int l = 0; l < r; l++) {
500 avedists[r][l] = averages[count];
501 avedists[l][r] = averages[count];
502 stddists[r][l] = stdDev[count];
503 stddists[l][r] = stdDev[count];
508 string aveFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".unweighted.ave." + getOutputFileNameTag("phylip");
509 if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); }
510 else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName); }
512 m->openOutputFile(aveFileName, out);
514 string stdFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".unweighted.std." + getOutputFileNameTag("phylip");
515 if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
516 else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }
518 m->openOutputFile(stdFileName, outStd);
520 if ((outputForm == "lt") || (outputForm == "square")) {
522 out << m->getNumGroups() << endl;
523 outStd << m->getNumGroups() << endl;
527 for (int r=0; r<m->getNumGroups(); r++) {
529 string name = (m->getGroups())[r];
530 if (name.length() < 10) { //pad with spaces to make compatible
531 while (name.length() < 10) { name += " "; }
534 if (outputForm == "lt") {
536 outStd << name << '\t';
539 for (int l = 0; l < r; l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t';}
540 out << endl; outStd << endl;
541 }else if (outputForm == "square") {
543 outStd << name << '\t';
546 for (int l = 0; l < m->getNumGroups(); l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t'; }
547 out << endl; outStd << endl;
550 for (int l = 0; l < r; l++) {
551 string otherName = (m->getGroups())[l];
552 if (otherName.length() < 10) { //pad with spaces to make compatible
553 while (otherName.length() < 10) { otherName += " "; }
556 out << name << '\t' << otherName << avedists[r][l] << endl;
557 outStd << name << '\t' << otherName << stddists[r][l] << endl;
566 catch(exception& e) {
567 m->errorOut(e, "UnifracUnweightedCommand", "getAverageSTDMatrices");
572 /**************************************************************************************************/
573 int UnifracUnweightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
576 //used in tree constructor
579 //create treemap class from groupmap for tree class to use
581 newTmap.makeSim(m->getGroups());
583 //clear old tree names if any
584 m->Treenames.clear();
586 //fills globaldatas tree names
587 m->Treenames = m->getGroups();
589 vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
591 if (m->control_pressed) { return 0; }
594 Tree* conTree = con.getTree(newTrees);
596 //create a new filename
597 string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.cons." + getOutputFileNameTag("tree");
598 outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
600 m->openOutputFile(conFile, outTree);
602 if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
608 catch(exception& e) {
609 m->errorOut(e, "UnifracUnweightedCommand", "getConsensusTrees");
613 /**************************************************************************************************/
615 vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
620 //create a new filename
621 string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.all." + getOutputFileNameTag("tree");
622 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
625 m->openOutputFile(outputFile, outAll);
628 for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
630 if (m->control_pressed) { break; }
632 //make matrix with scores in it
633 vector< vector<double> > sims; sims.resize(m->getNumGroups());
634 for (int j = 0; j < m->getNumGroups(); j++) {
635 sims[j].resize(m->getNumGroups(), 0.0);
639 for (int r=0; r<m->getNumGroups(); r++) {
640 for (int l = 0; l < r; l++) {
641 double sim = -(dists[i][count]-1.0);
649 Tree* tempTree = new Tree(&mytmap, sims);
650 map<string, string> empty;
651 tempTree->assembleTree(empty);
653 trees.push_back(tempTree);
656 tempTree->print(outAll);
661 if (m->control_pressed) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
665 catch(exception& e) {
666 m->errorOut(e, "UnifracUnweightedCommand", "buildTrees");
670 /**************************************************************************************************/
672 int UnifracUnweightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
674 vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
676 Unweighted unweighted(includeRoot);
678 //get unweighted scores for random trees - if random is false iters = 0
679 for (int j = 0; j < iters; j++) {
681 //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison
682 randomData = unweighted.getValues(thisTree, "", "", processors, outputDir);
684 if (m->control_pressed) { return 0; }
686 for(int k = 0; k < numComp; k++) {
687 //add trees unweighted score to map of scores
688 map<float,float>::iterator it = rscoreFreq[k].find(randomData[k]);
689 if (it != rscoreFreq[k].end()) {//already have that score
690 rscoreFreq[k][randomData[k]]++;
691 }else{//first time we have seen this score
692 rscoreFreq[k][randomData[k]] = 1;
695 //add randoms score to validscores
696 validScores[randomData[k]] = randomData[k];
700 for(int a = 0; a < numComp; a++) {
701 float rcumul = 1.0000;
703 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
704 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
705 //make rscoreFreq map and rCumul
706 map<float,float>::iterator it2 = rscoreFreq[a].find(it->first);
707 rCumul[a][it->first] = rcumul;
708 //get percentage of random trees with that info
709 if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
710 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
712 UWScoreSig[a].push_back(rCumul[a][usersScores[a]]);
717 catch(exception& e) {
718 m->errorOut(e, "UnifracUnweightedCommand", "runRandomCalcs");
722 /***********************************************************/
723 void UnifracUnweightedCommand::printUnweightedFile() {
728 tags.push_back("Score");
729 tags.push_back("RandFreq"); tags.push_back("RandCumul");
731 for(int a = 0; a < numComp; a++) {
732 output->initFile(groupComb[a], tags);
734 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
735 data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
736 output->output(data);
742 catch(exception& e) {
743 m->errorOut(e, "UnifracUnweightedCommand", "printUnweightedFile");
748 /***********************************************************/
749 void UnifracUnweightedCommand::printUWSummaryFile(int i) {
753 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
757 for(int a = 0; a < numComp; a++) {
758 outSum << i+1 << '\t';
759 m->mothurOut(toString(i+1) + "\t");
762 if (UWScoreSig[a][0] > (1/(float)iters)) {
763 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
764 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
765 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t" + toString(UWScoreSig[a][0])+ "\n");
767 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
768 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
769 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t<" + toString((1/float(iters))) + "\n");
772 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << endl;
773 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << endl;
774 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\n");
779 catch(exception& e) {
780 m->errorOut(e, "UnifracUnweightedCommand", "printUWSummaryFile");
784 /***********************************************************/
785 void UnifracUnweightedCommand::createPhylipFile(int i) {
787 string phylipFileName;
788 if ((outputForm == "lt") || (outputForm == "square")) {
789 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.phylip." + getOutputFileNameTag("phylip");
790 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
792 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.column." + getOutputFileNameTag("column");
793 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
797 m->openOutputFile(phylipFileName, out);
799 if ((outputForm == "lt") || (outputForm == "square")) {
801 out << m->getNumGroups() << endl;
804 //make matrix with scores in it
805 vector< vector<float> > dists; dists.resize(m->getNumGroups());
806 for (int i = 0; i < m->getNumGroups(); i++) {
807 dists[i].resize(m->getNumGroups(), 0.0);
810 //flip it so you can print it
812 for (int r=0; r<m->getNumGroups(); r++) {
813 for (int l = 0; l < r; l++) {
814 dists[r][l] = utreeScores[count][0];
815 dists[l][r] = utreeScores[count][0];
821 for (int r=0; r<m->getNumGroups(); r++) {
823 string name = (m->getGroups())[r];
824 if (name.length() < 10) { //pad with spaces to make compatible
825 while (name.length() < 10) { name += " "; }
828 if (outputForm == "lt") {
832 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
834 }else if (outputForm == "square") {
838 for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; }
842 for (int l = 0; l < r; l++) {
843 string otherName = (m->getGroups())[l];
844 if (otherName.length() < 10) { //pad with spaces to make compatible
845 while (otherName.length() < 10) { otherName += " "; }
848 out << name << '\t' << otherName << dists[r][l] << endl;
854 catch(exception& e) {
855 m->errorOut(e, "UnifracUnweightedCommand", "createPhylipFile");
859 /***********************************************************/