2 * unifracunweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracunweightedcommand.h"
11 #include "treereader.h"
12 #include "subsample.h"
13 #include "consensus.h"
15 //**********************************************************************************************************************
16 vector<string> UnifracUnweightedCommand::setParameters(){
18 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","unweighted-uwsummary",false,true,true); parameters.push_back(ptree);
19 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
20 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
21 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
22 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
23 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25 CommandParameter prandom("random", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prandom);
26 CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "","phylip-column",false,false); parameters.push_back(pdistance);
27 CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
28 CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "","tree",false,false); parameters.push_back(pconsensus);
29 CommandParameter proot("root", "Boolean", "F", "", "", "", "","",false,false); parameters.push_back(proot);
30 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
31 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
33 vector<string> myArray;
34 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
38 m->errorOut(e, "UnifracUnweightedCommand", "setParameters");
42 //**********************************************************************************************************************
43 string UnifracUnweightedCommand::getHelpString(){
45 string helpString = "";
46 helpString += "The unifrac.unweighted command parameters are tree, group, name, count, groups, iters, distance, processors, root and random. tree parameter is required unless you have valid current tree file.\n";
47 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n";
48 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
49 helpString += "The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n";
50 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n";
51 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
52 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
53 helpString += "The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n";
54 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
55 helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results of the subsampling, as well as a consensus tree built from these trees. Default=F.\n";
56 helpString += "Example unifrac.unweighted(groups=A-B-C, iters=500).\n";
57 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
58 helpString += "The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual.\n";
59 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
63 m->errorOut(e, "UnifracUnweightedCommand", "getHelpString");
67 //**********************************************************************************************************************
68 string UnifracUnweightedCommand::getOutputPattern(string type) {
71 if (type == "unweighted") { pattern = "[filename],unweighted-[filename],[tag],unweighted"; }
72 else if (type == "uwsummary") { pattern = "[filename],uwsummary"; }
73 else if (type == "phylip") { pattern = "[filename],[tag],[tag2],dist"; }
74 else if (type == "column") { pattern = "[filename],[tag],[tag2],dist"; }
75 else if (type == "tree") { pattern = "[filename],[tag],[tag2],tre"; }
76 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
81 m->errorOut(e, "UnifracUnweightedCommand", "getOutputPattern");
85 //**********************************************************************************************************************
86 UnifracUnweightedCommand::UnifracUnweightedCommand(){
88 abort = true; calledHelp = true;
90 vector<string> tempOutNames;
91 outputTypes["unweighted"] = tempOutNames;
92 outputTypes["uwsummary"] = tempOutNames;
93 outputTypes["phylip"] = tempOutNames;
94 outputTypes["column"] = tempOutNames;
95 outputTypes["tree"] = tempOutNames;
98 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
102 /***********************************************************/
103 UnifracUnweightedCommand::UnifracUnweightedCommand(string option) {
105 abort = false; calledHelp = false;
108 //allow user to run help
109 if(option == "help") { help(); abort = true; calledHelp = true; }
110 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
113 vector<string> myArray = setParameters();
115 OptionParser parser(option);
116 map<string,string> parameters = parser.getParameters();
117 map<string,string>::iterator it;
119 ValidParameters validParameter;
121 //check to make sure all parameters are valid for command
122 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
123 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
126 //initialize outputTypes
127 vector<string> tempOutNames;
128 outputTypes["unweighted"] = tempOutNames;
129 outputTypes["uwsummary"] = tempOutNames;
130 outputTypes["phylip"] = tempOutNames;
131 outputTypes["column"] = tempOutNames;
132 outputTypes["tree"] = tempOutNames;
134 //if the user changes the input directory command factory will send this info to us in the output parameter
135 string inputDir = validParameter.validFile(parameters, "inputdir", false);
136 if (inputDir == "not found"){ inputDir = ""; }
139 it = parameters.find("tree");
140 //user has given a template file
141 if(it != parameters.end()){
142 path = m->hasPath(it->second);
143 //if the user has not given a path then, add inputdir. else leave path alone.
144 if (path == "") { parameters["tree"] = inputDir + it->second; }
147 it = parameters.find("group");
148 //user has given a template file
149 if(it != parameters.end()){
150 path = m->hasPath(it->second);
151 //if the user has not given a path then, add inputdir. else leave path alone.
152 if (path == "") { parameters["group"] = inputDir + it->second; }
155 it = parameters.find("name");
156 //user has given a template file
157 if(it != parameters.end()){
158 path = m->hasPath(it->second);
159 //if the user has not given a path then, add inputdir. else leave path alone.
160 if (path == "") { parameters["name"] = inputDir + it->second; }
163 it = parameters.find("count");
164 //user has given a template file
165 if(it != parameters.end()){
166 path = m->hasPath(it->second);
167 //if the user has not given a path then, add inputdir. else leave path alone.
168 if (path == "") { parameters["count"] = inputDir + it->second; }
172 //check for required parameters
173 treefile = validParameter.validFile(parameters, "tree", true);
174 if (treefile == "not open") { abort = true; }
175 else if (treefile == "not found") { //if there is a current design file, use it
176 treefile = m->getTreeFile();
177 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
178 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
179 }else { m->setTreeFile(treefile); }
181 //check for required parameters
182 groupfile = validParameter.validFile(parameters, "group", true);
183 if (groupfile == "not open") { abort = true; }
184 else if (groupfile == "not found") { groupfile = ""; }
185 else { m->setGroupFile(groupfile); }
187 namefile = validParameter.validFile(parameters, "name", true);
188 if (namefile == "not open") { namefile = ""; abort = true; }
189 else if (namefile == "not found") { namefile = ""; }
190 else { m->setNameFile(namefile); }
192 countfile = validParameter.validFile(parameters, "count", true);
193 if (countfile == "not open") { countfile = ""; abort = true; }
194 else if (countfile == "not found") { countfile = ""; }
195 else { m->setCountTableFile(countfile); }
197 if ((namefile != "") && (countfile != "")) {
198 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
201 if ((groupfile != "") && (countfile != "")) {
202 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
205 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
207 //check for optional parameter and set defaults
208 // ...at some point should added some additional type checking...
209 groups = validParameter.validFile(parameters, "groups", false);
210 if (groups == "not found") { groups = ""; }
212 m->splitAtDash(groups, Groups);
213 m->setGroups(Groups);
216 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
217 m->mothurConvert(itersString, iters);
219 string temp = validParameter.validFile(parameters, "distance", false);
220 if (temp == "not found") { phylip = false; outputForm = ""; }
222 if (temp=="phylip") { temp = "lt"; }
223 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
224 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
227 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "f"; }
228 random = m->isTrue(temp);
230 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
231 includeRoot = m->isTrue(temp);
233 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
234 m->setProcessors(temp);
235 m->mothurConvert(temp, processors);
237 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
238 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
240 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
241 else { subsample = false; }
244 if (!subsample) { subsampleIters = 0; }
245 else { subsampleIters = iters; }
247 temp = validParameter.validFile(parameters, "consensus", false); if (temp == "not found") { temp = "F"; }
248 consensus = m->isTrue(temp);
250 if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; }
251 if (countfile == "") { if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; } }
254 if ((!testCt.testGroups(countfile)) && (subsample)) {
255 m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true;
258 if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
259 if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
261 if (!random) { iters = 0; } //turn off random calcs
263 //if user selects distance = true and no groups it won't calc the pairwise
264 if ((phylip) && (Groups.size() == 0)) {
266 m->splitAtDash(groups, Groups);
267 m->setGroups(Groups);
271 if (namefile == "") {
272 vector<string> files; files.push_back(treefile);
273 parser.getNameFile(files);
279 catch(exception& e) {
280 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
285 /***********************************************************/
286 int UnifracUnweightedCommand::execute() {
289 if (abort == true) { if (calledHelp) { return 0; } return 2; }
291 m->setTreeFile(treefile);
294 if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
295 else { reader = new TreeReader(treefile, countfile); }
296 T = reader->getTrees();
297 ct = T[0]->getCountTable();
300 map<string, string> variables;
301 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
302 sumFile = getOutputFileName("uwsummary",variables);
303 outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
304 m->openOutputFile(sumFile, outSum);
307 Groups = m->getGroups();
308 vector<string> namesGroups = ct->getNamesOfGroups();
309 util.setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze
311 Unweighted unweighted(includeRoot);
313 int start = time(NULL);
317 //user has not set size, set size = smallest samples size
318 if (subsampleSize == -1) {
319 vector<string> temp; temp.push_back(Groups[0]);
320 subsampleSize = ct->getGroupCount(Groups[0]); //num in first group
321 for (int i = 1; i < Groups.size(); i++) {
322 int thisSize = ct->getGroupCount(Groups[i]);
323 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
325 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
326 }else { //eliminate any too small groups
327 vector<string> newGroups = Groups;
329 for (int i = 0; i < newGroups.size(); i++) {
330 int thisSize = ct->getGroupCount(newGroups[i]);
332 if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); }
333 else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
335 m->setGroups(Groups);
339 util.getCombos(groupComb, Groups, numComp);
340 m->setGroups(Groups);
342 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
344 if (numComp < processors) { processors = numComp; }
346 if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
348 outSum << "Tree#" << '\t' << "Groups" << '\t' << "UWScore" <<'\t';
349 m->mothurOut("Tree#\tGroups\tUWScore\t");
350 if (random) { outSum << "UWSig"; m->mothurOut("UWSig"); }
351 outSum << endl; m->mothurOutEndLine();
353 //get pscores for users trees
354 for (int i = 0; i < T.size(); i++) {
355 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
360 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
361 variables["[tag]"] = toString(i+1);
362 string unFileName = getOutputFileName("unweighted", variables);
363 output = new ColumnFile(unFileName, itersString);
364 outputNames.push_back(unFileName); outputTypes["unweighted"].push_back(unFileName);
368 //get unweighted for users tree
369 rscoreFreq.resize(numComp);
370 rCumul.resize(numComp);
371 utreeScores.resize(numComp);
372 UWScoreSig.resize(numComp);
374 vector<double> userData; userData.resize(numComp,0); //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
376 userData = unweighted.getValues(T[i], processors, outputDir); //userData[0] = unweightedscore
378 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }return 0; }
380 //output scores for each combination
381 for(int k = 0; k < numComp; k++) {
383 utreeScores[k].push_back(userData[k]);
385 //add users score to validscores
386 validScores[userData[k]] = userData[k];
388 if (!random) { UWScoreSig[k].push_back(0.0); }
391 if (random) { runRandomCalcs(T[i], userData); }
393 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
395 int startSubsample = time(NULL);
398 vector< vector<double> > calcDistsTotals; //each iter, each groupCombos dists. this will be used to make .dist files
399 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
400 if (m->control_pressed) { break; }
402 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
403 CountTable* newCt = new CountTable();
405 //uses method of setting groups to doNotIncludeMe
407 if (m->debug) { sampleTime = time(NULL); }
409 Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
411 if (m->debug) { m->mothurOut("[DEBUG]: iter " + toString(thisIter) + " took " + toString(time(NULL) - sampleTime) + " seconds to sample tree.\n"); }
413 //call new weighted function
414 vector<double> iterData; iterData.resize(numComp,0);
415 Unweighted thisUnweighted(includeRoot);
416 iterData = thisUnweighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
418 //save data to make ave dist, std dist
419 calcDistsTotals.push_back(iterData);
422 delete subSampleTree;
424 if((thisIter+1) % 100 == 0){ m->mothurOutJustToScreen(toString(thisIter+1)+"\n"); }
426 if (subsample) { m->mothurOut("It took " + toString(time(NULL) - startSubsample) + " secs to run the subsampling."); m->mothurOutEndLine(); }
428 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
430 if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); }
431 if (consensus) { getConsensusTrees(calcDistsTotals, i); }
434 printUWSummaryFile(i);
435 if (random) { printUnweightedFile(); delete output; }
436 if (phylip) { createPhylipFile(i); }
448 for (int i = 0; i < T.size(); i++) { delete T[i]; }
450 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
452 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
454 //set phylip file as new current phylipfile
456 itTypes = outputTypes.find("phylip");
457 if (itTypes != outputTypes.end()) {
458 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
461 //set column file as new current columnfile
462 itTypes = outputTypes.find("column");
463 if (itTypes != outputTypes.end()) {
464 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
467 m->mothurOutEndLine();
468 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
469 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
470 m->mothurOutEndLine();
475 catch(exception& e) {
476 m->errorOut(e, "UnifracUnweightedCommand", "execute");
480 /**************************************************************************************************/
481 int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
483 //we need to find the average distance and standard deviation for each groups distance
485 vector<double> averages = m->getAverages(dists);
487 //find standard deviation
488 vector<double> stdDev = m->getStandardDeviation(dists, averages);
490 //make matrix with scores in it
491 vector< vector<double> > avedists; //avedists.resize(m->getNumGroups());
492 for (int i = 0; i < m->getNumGroups(); i++) {
494 for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
495 avedists.push_back(temp);
498 //make matrix with scores in it
499 vector< vector<double> > stddists; //stddists.resize(m->getNumGroups());
500 for (int i = 0; i < m->getNumGroups(); i++) {
502 for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
503 //stddists[i].resize(m->getNumGroups(), 0.0);
504 stddists.push_back(temp);
507 if (m->debug) { m->mothurOut("[DEBUG]: about to fill matrix.\n"); }
509 //flip it so you can print it
511 for (int r=0; r<m->getNumGroups(); r++) {
512 for (int l = 0; l < r; l++) {
513 avedists[r][l] = averages[count];
514 avedists[l][r] = averages[count];
515 stddists[r][l] = stdDev[count];
516 stddists[l][r] = stdDev[count];
521 if (m->debug) { m->mothurOut("[DEBUG]: done filling matrix.\n"); }
523 map<string, string> variables;
524 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
525 variables["[tag]"] = toString(treeNum+1);
526 variables["[tag2]"] = "unweighted.ave";
527 string aveFileName = getOutputFileName("phylip",variables);
528 if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); }
529 else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName); }
531 m->openOutputFile(aveFileName, out);
533 variables["[tag2]"] = "unweighted.std";
534 string stdFileName = getOutputFileName("phylip",variables);
535 if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
536 else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }
538 m->openOutputFile(stdFileName, outStd);
540 if ((outputForm == "lt") || (outputForm == "square")) {
542 out << m->getNumGroups() << endl;
543 outStd << m->getNumGroups() << endl;
547 for (int r=0; r<m->getNumGroups(); r++) {
549 string name = (m->getGroups())[r];
550 if (name.length() < 10) { //pad with spaces to make compatible
551 while (name.length() < 10) { name += " "; }
554 if (outputForm == "lt") {
556 outStd << name << '\t';
559 for (int l = 0; l < r; l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t';}
560 out << endl; outStd << endl;
561 }else if (outputForm == "square") {
563 outStd << name << '\t';
566 for (int l = 0; l < m->getNumGroups(); l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t'; }
567 out << endl; outStd << endl;
570 for (int l = 0; l < r; l++) {
571 string otherName = (m->getGroups())[l];
572 if (otherName.length() < 10) { //pad with spaces to make compatible
573 while (otherName.length() < 10) { otherName += " "; }
576 out << name << '\t' << otherName << avedists[r][l] << endl;
577 outStd << name << '\t' << otherName << stddists[r][l] << endl;
586 catch(exception& e) {
587 m->errorOut(e, "UnifracUnweightedCommand", "getAverageSTDMatrices");
592 /**************************************************************************************************/
593 int UnifracUnweightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
596 //used in tree constructor
599 //create treemap class from groupmap for tree class to use
602 map<string, string> groupMap;
604 for (int i = 0; i < m->getGroups().size(); i++) {
605 nameMap.insert(m->getGroups()[i]);
606 gps.insert(m->getGroups()[i]);
607 groupMap[m->getGroups()[i]] = m->getGroups()[i];
609 newCt.createTable(nameMap, groupMap, gps);
611 //clear old tree names if any
612 m->Treenames.clear();
614 //fills globaldatas tree names
615 m->Treenames = m->getGroups();
617 vector<Tree*> newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created
619 if (m->control_pressed) { return 0; }
622 Tree* conTree = con.getTree(newTrees);
624 //create a new filename
625 map<string, string> variables;
626 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
627 variables["[tag]"] = toString(treeNum+1);
628 variables["[tag2]"] = "unweighted.cons";
629 string conFile = getOutputFileName("tree",variables);
630 outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
632 m->openOutputFile(conFile, outTree);
634 if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
640 catch(exception& e) {
641 m->errorOut(e, "UnifracUnweightedCommand", "getConsensusTrees");
645 /**************************************************************************************************/
647 vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, CountTable& myct) {
652 //create a new filename
653 map<string, string> variables;
654 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
655 variables["[tag]"] = toString(treeNum+1);
656 variables["[tag2]"] = "unweighted.all";
657 string outputFile = getOutputFileName("tree",variables);
658 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
661 m->openOutputFile(outputFile, outAll);
664 for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
666 if (m->control_pressed) { break; }
668 //make matrix with scores in it
669 vector< vector<double> > sims; sims.resize(m->getNumGroups());
670 for (int j = 0; j < m->getNumGroups(); j++) {
671 sims[j].resize(m->getNumGroups(), 0.0);
675 for (int r=0; r<m->getNumGroups(); r++) {
676 for (int l = 0; l < r; l++) {
677 double sim = -(dists[i][count]-1.0);
685 Tree* tempTree = new Tree(&myct, sims);
686 tempTree->assembleTree();
688 trees.push_back(tempTree);
691 tempTree->print(outAll);
696 if (m->control_pressed) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
700 catch(exception& e) {
701 m->errorOut(e, "UnifracUnweightedCommand", "buildTrees");
705 /**************************************************************************************************/
707 int UnifracUnweightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
709 vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
711 Unweighted unweighted(includeRoot);
713 //get unweighted scores for random trees - if random is false iters = 0
714 for (int j = 0; j < iters; j++) {
716 //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison
717 randomData = unweighted.getValues(thisTree, "", "", processors, outputDir);
719 if (m->control_pressed) { return 0; }
721 for(int k = 0; k < numComp; k++) {
722 //add trees unweighted score to map of scores
723 map<float,float>::iterator it = rscoreFreq[k].find(randomData[k]);
724 if (it != rscoreFreq[k].end()) {//already have that score
725 rscoreFreq[k][randomData[k]]++;
726 }else{//first time we have seen this score
727 rscoreFreq[k][randomData[k]] = 1;
730 //add randoms score to validscores
731 validScores[randomData[k]] = randomData[k];
735 for(int a = 0; a < numComp; a++) {
736 float rcumul = 1.0000;
738 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
739 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
740 //make rscoreFreq map and rCumul
741 map<float,float>::iterator it2 = rscoreFreq[a].find(it->first);
742 rCumul[a][it->first] = rcumul;
743 //get percentage of random trees with that info
744 if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
745 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
747 UWScoreSig[a].push_back(rCumul[a][usersScores[a]]);
752 catch(exception& e) {
753 m->errorOut(e, "UnifracUnweightedCommand", "runRandomCalcs");
757 /***********************************************************/
758 void UnifracUnweightedCommand::printUnweightedFile() {
763 tags.push_back("Score");
764 tags.push_back("RandFreq"); tags.push_back("RandCumul");
766 for(int a = 0; a < numComp; a++) {
767 output->initFile(groupComb[a], tags);
769 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
770 data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
771 output->output(data);
777 catch(exception& e) {
778 m->errorOut(e, "UnifracUnweightedCommand", "printUnweightedFile");
783 /***********************************************************/
784 void UnifracUnweightedCommand::printUWSummaryFile(int i) {
788 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
792 for(int a = 0; a < numComp; a++) {
793 outSum << i+1 << '\t';
794 m->mothurOut(toString(i+1) + "\t");
797 if (UWScoreSig[a][0] > (1/(float)iters)) {
798 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
799 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
800 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t" + toString(UWScoreSig[a][0])+ "\n");
802 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
803 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
804 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t<" + toString((1/float(iters))) + "\n");
807 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << endl;
808 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << endl;
809 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\n");
814 catch(exception& e) {
815 m->errorOut(e, "UnifracUnweightedCommand", "printUWSummaryFile");
819 /***********************************************************/
820 void UnifracUnweightedCommand::createPhylipFile(int i) {
822 string phylipFileName;
823 map<string, string> variables;
824 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
825 variables["[tag]"] = toString(i+1);
826 if ((outputForm == "lt") || (outputForm == "square")) {
827 variables["[tag2]"] = "unweighted.phylip";
828 phylipFileName = getOutputFileName("phylip",variables);
829 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
831 variables["[tag2]"] = "unweighted.column";
832 phylipFileName = getOutputFileName("column",variables);
833 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
837 m->openOutputFile(phylipFileName, out);
839 if ((outputForm == "lt") || (outputForm == "square")) {
841 out << m->getNumGroups() << endl;
844 //make matrix with scores in it
845 vector< vector<float> > dists; dists.resize(m->getNumGroups());
846 for (int i = 0; i < m->getNumGroups(); i++) {
847 dists[i].resize(m->getNumGroups(), 0.0);
850 //flip it so you can print it
852 for (int r=0; r<m->getNumGroups(); r++) {
853 for (int l = 0; l < r; l++) {
854 dists[r][l] = utreeScores[count][0];
855 dists[l][r] = utreeScores[count][0];
861 for (int r=0; r<m->getNumGroups(); r++) {
863 string name = (m->getGroups())[r];
864 if (name.length() < 10) { //pad with spaces to make compatible
865 while (name.length() < 10) { name += " "; }
868 if (outputForm == "lt") {
872 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
874 }else if (outputForm == "square") {
878 for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; }
882 for (int l = 0; l < r; l++) {
883 string otherName = (m->getGroups())[l];
884 if (otherName.length() < 10) { //pad with spaces to make compatible
885 while (otherName.length() < 10) { otherName += " "; }
888 out << name << '\t' << otherName << dists[r][l] << endl;
894 catch(exception& e) {
895 m->errorOut(e, "UnifracUnweightedCommand", "createPhylipFile");
899 /***********************************************************/