2 * unifracunweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracunweightedcommand.h"
11 #include "treereader.h"
12 #include "subsample.h"
13 #include "consensus.h"
15 //**********************************************************************************************************************
16 vector<string> UnifracUnweightedCommand::setParameters(){
18 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
20 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
21 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
22 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
23 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
26 CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(pdistance);
27 CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
28 CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
29 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
30 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
31 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
33 vector<string> myArray;
34 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
38 m->errorOut(e, "UnifracUnweightedCommand", "setParameters");
42 //**********************************************************************************************************************
43 string UnifracUnweightedCommand::getHelpString(){
45 string helpString = "";
46 helpString += "The unifrac.unweighted command parameters are tree, group, name, count, groups, iters, distance, processors, root and random. tree parameter is required unless you have valid current tree file.\n";
47 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n";
48 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
49 helpString += "The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n";
50 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n";
51 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
52 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
53 helpString += "The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n";
54 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
55 helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results of the subsampling, as well as a consensus tree built from these trees. Default=F.\n";
56 helpString += "Example unifrac.unweighted(groups=A-B-C, iters=500).\n";
57 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
58 helpString += "The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual.\n";
59 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
63 m->errorOut(e, "UnifracUnweightedCommand", "getHelpString");
67 //**********************************************************************************************************************
68 string UnifracUnweightedCommand::getOutputFileNameTag(string type, string inputName=""){
70 string outputFileName = "";
71 map<string, vector<string> >::iterator it;
73 //is this a type this command creates
74 it = outputTypes.find(type);
75 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
77 if (type == "unweighted") { outputFileName = "unweighted"; }
78 else if (type == "uwsummary") { outputFileName = "uwsummary"; }
79 else if (type == "phylip") { outputFileName = "dist"; }
80 else if (type == "column") { outputFileName = "dist"; }
81 else if (type == "tree") { outputFileName = "tre"; }
82 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
84 return outputFileName;
87 m->errorOut(e, "UnifracUnweightedCommand", "getOutputFileNameTag");
92 //**********************************************************************************************************************
93 UnifracUnweightedCommand::UnifracUnweightedCommand(){
95 abort = true; calledHelp = true;
97 vector<string> tempOutNames;
98 outputTypes["unweighted"] = tempOutNames;
99 outputTypes["uwsummary"] = tempOutNames;
100 outputTypes["phylip"] = tempOutNames;
101 outputTypes["column"] = tempOutNames;
102 outputTypes["tree"] = tempOutNames;
104 catch(exception& e) {
105 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
109 /***********************************************************/
110 UnifracUnweightedCommand::UnifracUnweightedCommand(string option) {
112 abort = false; calledHelp = false;
115 //allow user to run help
116 if(option == "help") { help(); abort = true; calledHelp = true; }
117 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
120 vector<string> myArray = setParameters();
122 OptionParser parser(option);
123 map<string,string> parameters = parser.getParameters();
124 map<string,string>::iterator it;
126 ValidParameters validParameter;
128 //check to make sure all parameters are valid for command
129 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
130 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
133 //initialize outputTypes
134 vector<string> tempOutNames;
135 outputTypes["unweighted"] = tempOutNames;
136 outputTypes["uwsummary"] = tempOutNames;
137 outputTypes["phylip"] = tempOutNames;
138 outputTypes["column"] = tempOutNames;
139 outputTypes["tree"] = tempOutNames;
141 //if the user changes the input directory command factory will send this info to us in the output parameter
142 string inputDir = validParameter.validFile(parameters, "inputdir", false);
143 if (inputDir == "not found"){ inputDir = ""; }
146 it = parameters.find("tree");
147 //user has given a template file
148 if(it != parameters.end()){
149 path = m->hasPath(it->second);
150 //if the user has not given a path then, add inputdir. else leave path alone.
151 if (path == "") { parameters["tree"] = inputDir + it->second; }
154 it = parameters.find("group");
155 //user has given a template file
156 if(it != parameters.end()){
157 path = m->hasPath(it->second);
158 //if the user has not given a path then, add inputdir. else leave path alone.
159 if (path == "") { parameters["group"] = inputDir + it->second; }
162 it = parameters.find("name");
163 //user has given a template file
164 if(it != parameters.end()){
165 path = m->hasPath(it->second);
166 //if the user has not given a path then, add inputdir. else leave path alone.
167 if (path == "") { parameters["name"] = inputDir + it->second; }
170 it = parameters.find("count");
171 //user has given a template file
172 if(it != parameters.end()){
173 path = m->hasPath(it->second);
174 //if the user has not given a path then, add inputdir. else leave path alone.
175 if (path == "") { parameters["count"] = inputDir + it->second; }
179 //check for required parameters
180 treefile = validParameter.validFile(parameters, "tree", true);
181 if (treefile == "not open") { abort = true; }
182 else if (treefile == "not found") { //if there is a current design file, use it
183 treefile = m->getTreeFile();
184 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
185 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
186 }else { m->setTreeFile(treefile); }
188 //check for required parameters
189 groupfile = validParameter.validFile(parameters, "group", true);
190 if (groupfile == "not open") { abort = true; }
191 else if (groupfile == "not found") { groupfile = ""; }
192 else { m->setGroupFile(groupfile); }
194 namefile = validParameter.validFile(parameters, "name", true);
195 if (namefile == "not open") { namefile = ""; abort = true; }
196 else if (namefile == "not found") { namefile = ""; }
197 else { m->setNameFile(namefile); }
199 countfile = validParameter.validFile(parameters, "count", true);
200 if (countfile == "not open") { countfile = ""; abort = true; }
201 else if (countfile == "not found") { countfile = ""; }
202 else { m->setCountTableFile(countfile); }
204 if ((namefile != "") && (countfile != "")) {
205 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
208 if ((groupfile != "") && (countfile != "")) {
209 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
212 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
214 //check for optional parameter and set defaults
215 // ...at some point should added some additional type checking...
216 groups = validParameter.validFile(parameters, "groups", false);
217 if (groups == "not found") { groups = ""; }
219 m->splitAtDash(groups, Groups);
220 m->setGroups(Groups);
223 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
224 m->mothurConvert(itersString, iters);
226 string temp = validParameter.validFile(parameters, "distance", false);
227 if (temp == "not found") { phylip = false; outputForm = ""; }
229 if (temp=="phylip") { temp = "lt"; }
230 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
231 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
234 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "f"; }
235 random = m->isTrue(temp);
237 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
238 includeRoot = m->isTrue(temp);
240 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
241 m->setProcessors(temp);
242 m->mothurConvert(temp, processors);
244 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
245 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
247 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
248 else { subsample = false; }
251 if (!subsample) { subsampleIters = 0; }
252 else { subsampleIters = iters; }
254 temp = validParameter.validFile(parameters, "consensus", false); if (temp == "not found") { temp = "F"; }
255 consensus = m->isTrue(temp);
257 if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; }
258 if (countfile == "") { if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; } }
261 if ((!testCt.testGroups(countfile)) && (subsample)) {
262 m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true;
265 if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
266 if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
268 if (!random) { iters = 0; } //turn off random calcs
270 //if user selects distance = true and no groups it won't calc the pairwise
271 if ((phylip) && (Groups.size() == 0)) {
273 m->splitAtDash(groups, Groups);
274 m->setGroups(Groups);
278 if (namefile == "") {
279 vector<string> files; files.push_back(treefile);
280 parser.getNameFile(files);
286 catch(exception& e) {
287 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
292 /***********************************************************/
293 int UnifracUnweightedCommand::execute() {
296 if (abort == true) { if (calledHelp) { return 0; } return 2; }
298 m->setTreeFile(treefile);
301 if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
302 else { reader = new TreeReader(treefile, countfile); }
303 T = reader->getTrees();
304 ct = T[0]->getCountTable();
307 sumFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("uwsummary");
308 outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
309 m->openOutputFile(sumFile, outSum);
312 Groups = m->getGroups();
313 vector<string> namesGroups = ct->getNamesOfGroups();
314 util.setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze
316 Unweighted unweighted(includeRoot);
318 int start = time(NULL);
322 //user has not set size, set size = smallest samples size
323 if (subsampleSize == -1) {
324 vector<string> temp; temp.push_back(Groups[0]);
325 subsampleSize = ct->getGroupCount(Groups[0]); //num in first group
326 for (int i = 1; i < Groups.size(); i++) {
327 int thisSize = ct->getGroupCount(Groups[i]);
328 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
330 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
331 }else { //eliminate any too small groups
332 vector<string> newGroups = Groups;
334 for (int i = 0; i < newGroups.size(); i++) {
335 int thisSize = ct->getGroupCount(newGroups[i]);
337 if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); }
338 else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
340 m->setGroups(Groups);
344 util.getCombos(groupComb, Groups, numComp);
345 m->setGroups(Groups);
347 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
349 if (numComp < processors) { processors = numComp; }
351 if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
353 outSum << "Tree#" << '\t' << "Groups" << '\t' << "UWScore" <<'\t';
354 m->mothurOut("Tree#\tGroups\tUWScore\t");
355 if (random) { outSum << "UWSig"; m->mothurOut("UWSig"); }
356 outSum << endl; m->mothurOutEndLine();
358 //get pscores for users trees
359 for (int i = 0; i < T.size(); i++) {
360 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
365 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("unweighted"), itersString);
366 outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("unweighted"));
367 outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("unweighted"));
371 //get unweighted for users tree
372 rscoreFreq.resize(numComp);
373 rCumul.resize(numComp);
374 utreeScores.resize(numComp);
375 UWScoreSig.resize(numComp);
377 vector<double> userData; userData.resize(numComp,0); //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
379 userData = unweighted.getValues(T[i], processors, outputDir); //userData[0] = unweightedscore
381 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }return 0; }
383 //output scores for each combination
384 for(int k = 0; k < numComp; k++) {
386 utreeScores[k].push_back(userData[k]);
388 //add users score to validscores
389 validScores[userData[k]] = userData[k];
391 if (!random) { UWScoreSig[k].push_back(0.0); }
394 if (random) { runRandomCalcs(T[i], userData); }
396 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
398 int startSubsample = time(NULL);
401 vector< vector<double> > calcDistsTotals; //each iter, each groupCombos dists. this will be used to make .dist files
402 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
403 if (m->control_pressed) { break; }
405 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
406 CountTable* newCt = new CountTable();
408 //uses method of setting groups to doNotIncludeMe
410 Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
412 //call new weighted function
413 vector<double> iterData; iterData.resize(numComp,0);
414 Unweighted thisUnweighted(includeRoot);
415 iterData = thisUnweighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
417 //save data to make ave dist, std dist
418 calcDistsTotals.push_back(iterData);
421 delete subSampleTree;
423 if((thisIter+1) % 100 == 0){ m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine(); }
425 if (subsample) { m->mothurOut("It took " + toString(time(NULL) - startSubsample) + " secs to run the subsampling."); m->mothurOutEndLine(); }
427 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
429 if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); }
430 if (consensus) { getConsensusTrees(calcDistsTotals, i); }
433 printUWSummaryFile(i);
434 if (random) { printUnweightedFile(); delete output; }
435 if (phylip) { createPhylipFile(i); }
447 for (int i = 0; i < T.size(); i++) { delete T[i]; }
449 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
451 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
453 //set phylip file as new current phylipfile
455 itTypes = outputTypes.find("phylip");
456 if (itTypes != outputTypes.end()) {
457 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
460 //set column file as new current columnfile
461 itTypes = outputTypes.find("column");
462 if (itTypes != outputTypes.end()) {
463 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
466 m->mothurOutEndLine();
467 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
468 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
469 m->mothurOutEndLine();
474 catch(exception& e) {
475 m->errorOut(e, "UnifracUnweightedCommand", "execute");
479 /**************************************************************************************************/
480 int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
482 //we need to find the average distance and standard deviation for each groups distance
485 vector<double> averages; averages.resize(numComp, 0);
486 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
487 for (int i = 0; i < dists[thisIter].size(); i++) {
488 averages[i] += dists[thisIter][i];
493 for (int i = 0; i < averages.size(); i++) { averages[i] /= (float) subsampleIters; }
495 //find standard deviation
496 vector<double> stdDev; stdDev.resize(numComp, 0);
498 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
499 for (int j = 0; j < dists[thisIter].size(); j++) {
500 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
503 for (int i = 0; i < stdDev.size(); i++) {
504 stdDev[i] /= (float) subsampleIters;
505 stdDev[i] = sqrt(stdDev[i]);
508 //make matrix with scores in it
509 vector< vector<double> > avedists; avedists.resize(m->getNumGroups());
510 for (int i = 0; i < m->getNumGroups(); i++) {
511 avedists[i].resize(m->getNumGroups(), 0.0);
514 //make matrix with scores in it
515 vector< vector<double> > stddists; stddists.resize(m->getNumGroups());
516 for (int i = 0; i < m->getNumGroups(); i++) {
517 stddists[i].resize(m->getNumGroups(), 0.0);
520 //flip it so you can print it
522 for (int r=0; r<m->getNumGroups(); r++) {
523 for (int l = 0; l < r; l++) {
524 avedists[r][l] = averages[count];
525 avedists[l][r] = averages[count];
526 stddists[r][l] = stdDev[count];
527 stddists[l][r] = stdDev[count];
532 string aveFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".unweighted.ave." + getOutputFileNameTag("phylip");
533 if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); }
534 else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName); }
536 m->openOutputFile(aveFileName, out);
538 string stdFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".unweighted.std." + getOutputFileNameTag("phylip");
539 if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
540 else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }
542 m->openOutputFile(stdFileName, outStd);
544 if ((outputForm == "lt") || (outputForm == "square")) {
546 out << m->getNumGroups() << endl;
547 outStd << m->getNumGroups() << endl;
551 for (int r=0; r<m->getNumGroups(); r++) {
553 string name = (m->getGroups())[r];
554 if (name.length() < 10) { //pad with spaces to make compatible
555 while (name.length() < 10) { name += " "; }
558 if (outputForm == "lt") {
560 outStd << name << '\t';
563 for (int l = 0; l < r; l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t';}
564 out << endl; outStd << endl;
565 }else if (outputForm == "square") {
567 outStd << name << '\t';
570 for (int l = 0; l < m->getNumGroups(); l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t'; }
571 out << endl; outStd << endl;
574 for (int l = 0; l < r; l++) {
575 string otherName = (m->getGroups())[l];
576 if (otherName.length() < 10) { //pad with spaces to make compatible
577 while (otherName.length() < 10) { otherName += " "; }
580 out << name << '\t' << otherName << avedists[r][l] << endl;
581 outStd << name << '\t' << otherName << stddists[r][l] << endl;
590 catch(exception& e) {
591 m->errorOut(e, "UnifracUnweightedCommand", "getAverageSTDMatrices");
596 /**************************************************************************************************/
597 int UnifracUnweightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
600 //used in tree constructor
603 //create treemap class from groupmap for tree class to use
606 map<string, string> groupMap;
608 for (int i = 0; i < m->getGroups().size(); i++) {
609 nameMap.insert(m->getGroups()[i]);
610 gps.insert(m->getGroups()[i]);
611 groupMap[m->getGroups()[i]] = m->getGroups()[i];
613 newCt.createTable(nameMap, groupMap, gps);
615 //clear old tree names if any
616 m->Treenames.clear();
618 //fills globaldatas tree names
619 m->Treenames = m->getGroups();
621 vector<Tree*> newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created
623 if (m->control_pressed) { return 0; }
626 Tree* conTree = con.getTree(newTrees);
628 //create a new filename
629 string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.cons." + getOutputFileNameTag("tree");
630 outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
632 m->openOutputFile(conFile, outTree);
634 if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
640 catch(exception& e) {
641 m->errorOut(e, "UnifracUnweightedCommand", "getConsensusTrees");
645 /**************************************************************************************************/
647 vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, CountTable& myct) {
652 //create a new filename
653 string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".unweighted.all." + getOutputFileNameTag("tree");
654 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
657 m->openOutputFile(outputFile, outAll);
660 for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
662 if (m->control_pressed) { break; }
664 //make matrix with scores in it
665 vector< vector<double> > sims; sims.resize(m->getNumGroups());
666 for (int j = 0; j < m->getNumGroups(); j++) {
667 sims[j].resize(m->getNumGroups(), 0.0);
671 for (int r=0; r<m->getNumGroups(); r++) {
672 for (int l = 0; l < r; l++) {
673 double sim = -(dists[i][count]-1.0);
681 Tree* tempTree = new Tree(&myct, sims);
682 tempTree->assembleTree();
684 trees.push_back(tempTree);
687 tempTree->print(outAll);
692 if (m->control_pressed) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
696 catch(exception& e) {
697 m->errorOut(e, "UnifracUnweightedCommand", "buildTrees");
701 /**************************************************************************************************/
703 int UnifracUnweightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
705 vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
707 Unweighted unweighted(includeRoot);
709 //get unweighted scores for random trees - if random is false iters = 0
710 for (int j = 0; j < iters; j++) {
712 //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison
713 randomData = unweighted.getValues(thisTree, "", "", processors, outputDir);
715 if (m->control_pressed) { return 0; }
717 for(int k = 0; k < numComp; k++) {
718 //add trees unweighted score to map of scores
719 map<float,float>::iterator it = rscoreFreq[k].find(randomData[k]);
720 if (it != rscoreFreq[k].end()) {//already have that score
721 rscoreFreq[k][randomData[k]]++;
722 }else{//first time we have seen this score
723 rscoreFreq[k][randomData[k]] = 1;
726 //add randoms score to validscores
727 validScores[randomData[k]] = randomData[k];
731 for(int a = 0; a < numComp; a++) {
732 float rcumul = 1.0000;
734 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
735 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
736 //make rscoreFreq map and rCumul
737 map<float,float>::iterator it2 = rscoreFreq[a].find(it->first);
738 rCumul[a][it->first] = rcumul;
739 //get percentage of random trees with that info
740 if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
741 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
743 UWScoreSig[a].push_back(rCumul[a][usersScores[a]]);
748 catch(exception& e) {
749 m->errorOut(e, "UnifracUnweightedCommand", "runRandomCalcs");
753 /***********************************************************/
754 void UnifracUnweightedCommand::printUnweightedFile() {
759 tags.push_back("Score");
760 tags.push_back("RandFreq"); tags.push_back("RandCumul");
762 for(int a = 0; a < numComp; a++) {
763 output->initFile(groupComb[a], tags);
765 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
766 data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
767 output->output(data);
773 catch(exception& e) {
774 m->errorOut(e, "UnifracUnweightedCommand", "printUnweightedFile");
779 /***********************************************************/
780 void UnifracUnweightedCommand::printUWSummaryFile(int i) {
784 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
788 for(int a = 0; a < numComp; a++) {
789 outSum << i+1 << '\t';
790 m->mothurOut(toString(i+1) + "\t");
793 if (UWScoreSig[a][0] > (1/(float)iters)) {
794 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
795 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
796 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t" + toString(UWScoreSig[a][0])+ "\n");
798 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
799 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
800 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t<" + toString((1/float(iters))) + "\n");
803 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << endl;
804 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << endl;
805 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\n");
810 catch(exception& e) {
811 m->errorOut(e, "UnifracUnweightedCommand", "printUWSummaryFile");
815 /***********************************************************/
816 void UnifracUnweightedCommand::createPhylipFile(int i) {
818 string phylipFileName;
819 if ((outputForm == "lt") || (outputForm == "square")) {
820 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.phylip." + getOutputFileNameTag("phylip");
821 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
823 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.column." + getOutputFileNameTag("column");
824 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
828 m->openOutputFile(phylipFileName, out);
830 if ((outputForm == "lt") || (outputForm == "square")) {
832 out << m->getNumGroups() << endl;
835 //make matrix with scores in it
836 vector< vector<float> > dists; dists.resize(m->getNumGroups());
837 for (int i = 0; i < m->getNumGroups(); i++) {
838 dists[i].resize(m->getNumGroups(), 0.0);
841 //flip it so you can print it
843 for (int r=0; r<m->getNumGroups(); r++) {
844 for (int l = 0; l < r; l++) {
845 dists[r][l] = utreeScores[count][0];
846 dists[l][r] = utreeScores[count][0];
852 for (int r=0; r<m->getNumGroups(); r++) {
854 string name = (m->getGroups())[r];
855 if (name.length() < 10) { //pad with spaces to make compatible
856 while (name.length() < 10) { name += " "; }
859 if (outputForm == "lt") {
863 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
865 }else if (outputForm == "square") {
869 for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; }
873 for (int l = 0; l < r; l++) {
874 string otherName = (m->getGroups())[l];
875 if (otherName.length() < 10) { //pad with spaces to make compatible
876 while (otherName.length() < 10) { otherName += " "; }
879 out << name << '\t' << otherName << dists[r][l] << endl;
885 catch(exception& e) {
886 m->errorOut(e, "UnifracUnweightedCommand", "createPhylipFile");
890 /***********************************************************/