2 * unifracweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracweightedcommand.h"
11 #include "consensus.h"
12 #include "subsample.h"
13 #include "treereader.h"
15 //**********************************************************************************************************************
16 vector<string> UnifracWeightedCommand::setParameters(){
18 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","weighted-wsummary",false,true,true); parameters.push_back(ptree);
19 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
20 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
21 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
22 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
23 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25 CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
26 CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "","tree",false,false); parameters.push_back(pconsensus);
27 CommandParameter prandom("random", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prandom);
28 CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "","phylip-column",false,false); parameters.push_back(pdistance);
29 CommandParameter proot("root", "Boolean", "F", "", "", "", "","",false,false); parameters.push_back(proot);
30 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
31 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
33 vector<string> myArray;
34 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
38 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
42 //**********************************************************************************************************************
43 string UnifracWeightedCommand::getHelpString(){
45 string helpString = "";
46 helpString += "The unifrac.weighted command parameters are tree, group, name, count, groups, iters, distance, processors, root, subsample, consensus and random. tree parameter is required unless you have valid current tree file.\n";
47 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
48 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
49 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
50 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
51 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
52 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
53 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
54 helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results, as well as a consensus tree built from these trees. Default=F.\n";
55 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
56 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
57 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
58 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
59 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
63 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
67 //**********************************************************************************************************************
68 string UnifracWeightedCommand::getOutputPattern(string type) {
71 if (type == "weighted") { pattern = "[filename],weighted-[filename],[tag],weighted"; }
72 else if (type == "wsummary") { pattern = "[filename],wsummary"; }
73 else if (type == "phylip") { pattern = "[filename],[tag],[tag2],dist"; }
74 else if (type == "column") { pattern = "[filename],[tag],[tag2],dist"; }
75 else if (type == "tree") { pattern = "[filename],[tag],[tag2],tre"; }
76 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
81 m->errorOut(e, "UnifracWeightedCommand", "getOutputPattern");
85 //**********************************************************************************************************************
86 UnifracWeightedCommand::UnifracWeightedCommand(){
88 abort = true; calledHelp = true;
90 vector<string> tempOutNames;
91 outputTypes["weighted"] = tempOutNames;
92 outputTypes["wsummary"] = tempOutNames;
93 outputTypes["phylip"] = tempOutNames;
94 outputTypes["column"] = tempOutNames;
95 outputTypes["tree"] = tempOutNames;
98 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
103 /***********************************************************/
104 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
106 abort = false; calledHelp = false;
108 //allow user to run help
109 if(option == "help") { help(); abort = true; calledHelp = true; }
110 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
113 vector<string> myArray = setParameters();
115 OptionParser parser(option);
116 map<string,string> parameters=parser.getParameters();
117 map<string,string>::iterator it;
119 ValidParameters validParameter;
121 //check to make sure all parameters are valid for command
122 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
123 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
126 //initialize outputTypes
127 vector<string> tempOutNames;
128 outputTypes["weighted"] = tempOutNames;
129 outputTypes["wsummary"] = tempOutNames;
130 outputTypes["phylip"] = tempOutNames;
131 outputTypes["column"] = tempOutNames;
132 outputTypes["tree"] = tempOutNames;
134 //if the user changes the input directory command factory will send this info to us in the output parameter
135 string inputDir = validParameter.validFile(parameters, "inputdir", false);
136 if (inputDir == "not found"){ inputDir = ""; }
139 it = parameters.find("tree");
140 //user has given a template file
141 if(it != parameters.end()){
142 path = m->hasPath(it->second);
143 //if the user has not given a path then, add inputdir. else leave path alone.
144 if (path == "") { parameters["tree"] = inputDir + it->second; }
147 it = parameters.find("group");
148 //user has given a template file
149 if(it != parameters.end()){
150 path = m->hasPath(it->second);
151 //if the user has not given a path then, add inputdir. else leave path alone.
152 if (path == "") { parameters["group"] = inputDir + it->second; }
155 it = parameters.find("name");
156 //user has given a template file
157 if(it != parameters.end()){
158 path = m->hasPath(it->second);
159 //if the user has not given a path then, add inputdir. else leave path alone.
160 if (path == "") { parameters["name"] = inputDir + it->second; }
163 it = parameters.find("count");
164 //user has given a template file
165 if(it != parameters.end()){
166 path = m->hasPath(it->second);
167 //if the user has not given a path then, add inputdir. else leave path alone.
168 if (path == "") { parameters["count"] = inputDir + it->second; }
172 //check for required parameters
173 treefile = validParameter.validFile(parameters, "tree", true);
174 if (treefile == "not open") { treefile = ""; abort = true; }
175 else if (treefile == "not found") { //if there is a current design file, use it
176 treefile = m->getTreeFile();
177 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
178 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
179 }else { m->setTreeFile(treefile); }
181 //check for required parameters
182 groupfile = validParameter.validFile(parameters, "group", true);
183 if (groupfile == "not open") { abort = true; }
184 else if (groupfile == "not found") { groupfile = ""; }
185 else { m->setGroupFile(groupfile); }
187 namefile = validParameter.validFile(parameters, "name", true);
188 if (namefile == "not open") { namefile = ""; abort = true; }
189 else if (namefile == "not found") { namefile = ""; }
190 else { m->setNameFile(namefile); }
192 countfile = validParameter.validFile(parameters, "count", true);
193 if (countfile == "not open") { countfile = ""; abort = true; }
194 else if (countfile == "not found") { countfile = ""; }
195 else { m->setCountTableFile(countfile); }
197 if ((namefile != "") && (countfile != "")) {
198 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
201 if ((groupfile != "") && (countfile != "")) {
202 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
205 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
208 //check for optional parameter and set defaults
209 // ...at some point should added some additional type checking...
210 groups = validParameter.validFile(parameters, "groups", false);
211 if (groups == "not found") { groups = ""; }
213 m->splitAtDash(groups, Groups);
214 m->setGroups(Groups);
217 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
218 m->mothurConvert(itersString, iters);
220 string temp = validParameter.validFile(parameters, "distance", false);
221 if (temp == "not found") { phylip = false; outputForm = ""; }
223 if (temp=="phylip") { temp = "lt"; }
224 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
225 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
228 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "F"; }
229 random = m->isTrue(temp);
231 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
232 includeRoot = m->isTrue(temp);
234 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
235 m->setProcessors(temp);
236 m->mothurConvert(temp, processors);
238 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
239 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
241 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
242 else { subsample = false; }
245 if (!subsample) { subsampleIters = 0; }
246 else { subsampleIters = iters; }
248 temp = validParameter.validFile(parameters, "consensus", false); if (temp == "not found") { temp = "F"; }
249 consensus = m->isTrue(temp);
251 if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; }
252 if (countfile == "") { if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; } }
255 if ((!testCt.testGroups(countfile)) && (subsample)) {
256 m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true;
259 if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
260 if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
263 if (namefile == "") {
264 vector<string> files; files.push_back(treefile);
265 parser.getNameFile(files);
272 catch(exception& e) {
273 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
277 /***********************************************************/
278 int UnifracWeightedCommand::execute() {
281 if (abort == true) { if (calledHelp) { return 0; } return 2; }
283 m->setTreeFile(treefile);
286 if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
287 else { reader = new TreeReader(treefile, countfile); }
288 T = reader->getTrees();
289 ct = T[0]->getCountTable();
292 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
294 map<string, string> variables;
295 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
296 sumFile = getOutputFileName("wsummary",variables);
297 m->openOutputFile(sumFile, outSum);
298 outputNames.push_back(sumFile); outputTypes["wsummary"].push_back(sumFile);
301 string s; //to make work with setgroups
302 Groups = m->getGroups();
303 vector<string> nameGroups = ct->getNamesOfGroups();
304 if (nameGroups.size() < 2) { m->mothurOut("[ERROR]: You cannot run unifrac.weighted with less than 2 groups, aborting.\n"); delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
305 util.setGroups(Groups, nameGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze
306 m->setGroups(Groups);
308 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
310 Weighted weighted(includeRoot);
312 int start = time(NULL);
316 //user has not set size, set size = smallest samples size
317 if (subsampleSize == -1) {
318 vector<string> temp; temp.push_back(Groups[0]);
319 subsampleSize = ct->getGroupCount(Groups[0]); //num in first group
320 for (int i = 1; i < Groups.size(); i++) {
321 int thisSize = ct->getGroupCount(Groups[i]);
322 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
324 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
325 }else { //eliminate any too small groups
326 vector<string> newGroups = Groups;
328 for (int i = 0; i < newGroups.size(); i++) {
329 int thisSize = ct->getGroupCount(newGroups[i]);
331 if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); }
332 else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
334 m->setGroups(Groups);
338 //here in case some groups are removed by subsample
339 util.getCombos(groupComb, Groups, numComp);
341 if (numComp < processors) { processors = numComp; }
343 if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
345 //get weighted scores for users trees
346 for (int i = 0; i < T.size(); i++) {
348 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
351 rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
352 uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
354 vector<double> userData; userData.resize(numComp,0); //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
355 vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
358 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
359 variables["[tag]"] = toString(i+1);
360 string wFileName = getOutputFileName("weighted", variables);
361 output = new ColumnFile(wFileName, itersString);
362 outputNames.push_back(wFileName); outputTypes["wweighted"].push_back(wFileName);
365 userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
366 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
369 for (int s=0; s<numComp; s++) {
370 //add users score to vector of user scores
371 uScores[s].push_back(userData[s]);
372 //save users tree score for summary file
373 utreeScores.push_back(userData[s]);
376 if (random) { runRandomCalcs(T[i], userData); }
384 vector< vector<double> > calcDistsTotals; //each iter, each groupCombos dists. this will be used to make .dist files
385 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
386 if (m->control_pressed) { break; }
388 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
389 CountTable* newCt = new CountTable();
392 if (m->debug) { sampleTime = time(NULL); }
394 //uses method of setting groups to doNotIncludeMe
396 Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
398 if (m->debug) { m->mothurOut("[DEBUG]: iter " + toString(thisIter) + " took " + toString(time(NULL) - sampleTime) + " seconds to sample tree.\n"); }
400 //call new weighted function
401 vector<double> iterData; iterData.resize(numComp,0);
402 Weighted thisWeighted(includeRoot);
403 iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
405 //save data to make ave dist, std dist
406 calcDistsTotals.push_back(iterData);
409 delete subSampleTree;
411 if((thisIter+1) % 100 == 0){ m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine(); }
414 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
416 if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); }
417 if (consensus) { getConsensusTrees(calcDistsTotals, i); }
421 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
423 if (phylip) { createPhylipFile(); }
427 //clear out users groups
430 for (int i = 0; i < T.size(); i++) { delete T[i]; }
432 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
434 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
436 //set phylip file as new current phylipfile
438 itTypes = outputTypes.find("phylip");
439 if (itTypes != outputTypes.end()) {
440 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
443 //set column file as new current columnfile
444 itTypes = outputTypes.find("column");
445 if (itTypes != outputTypes.end()) {
446 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
449 m->mothurOutEndLine();
450 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
451 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
452 m->mothurOutEndLine();
457 catch(exception& e) {
458 m->errorOut(e, "UnifracWeightedCommand", "execute");
462 /**************************************************************************************************/
463 int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
465 //we need to find the average distance and standard deviation for each groups distance
468 vector<double> averages; averages.resize(numComp, 0);
469 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
470 for (int i = 0; i < dists[thisIter].size(); i++) {
471 averages[i] += dists[thisIter][i];
476 for (int i = 0; i < averages.size(); i++) { averages[i] /= (float) subsampleIters; }
478 //find standard deviation
479 vector<double> stdDev; stdDev.resize(numComp, 0);
481 for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
482 for (int j = 0; j < dists[thisIter].size(); j++) {
483 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
486 for (int i = 0; i < stdDev.size(); i++) {
487 stdDev[i] /= (float) subsampleIters;
488 stdDev[i] = sqrt(stdDev[i]);
491 //make matrix with scores in it
492 vector< vector<double> > avedists; avedists.resize(m->getNumGroups());
493 for (int i = 0; i < m->getNumGroups(); i++) {
494 avedists[i].resize(m->getNumGroups(), 0.0);
497 //make matrix with scores in it
498 vector< vector<double> > stddists; stddists.resize(m->getNumGroups());
499 for (int i = 0; i < m->getNumGroups(); i++) {
500 stddists[i].resize(m->getNumGroups(), 0.0);
503 //flip it so you can print it
505 for (int r=0; r<m->getNumGroups(); r++) {
506 for (int l = 0; l < r; l++) {
507 avedists[r][l] = averages[count];
508 avedists[l][r] = averages[count];
509 stddists[r][l] = stdDev[count];
510 stddists[l][r] = stdDev[count];
515 map<string, string> variables;
516 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
517 variables["[tag]"] = toString(treeNum+1);
518 variables["[tag2]"] = "weighted.ave";
519 string aveFileName = getOutputFileName("phylip",variables);
520 if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); }
521 else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName); }
523 m->openOutputFile(aveFileName, out);
525 variables["[tag2]"] = "weighted.std";
526 string stdFileName = getOutputFileName("phylip",variables);
527 if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
528 else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }
530 m->openOutputFile(stdFileName, outStd);
532 if ((outputForm == "lt") || (outputForm == "square")) {
534 out << m->getNumGroups() << endl;
535 outStd << m->getNumGroups() << endl;
539 for (int r=0; r<m->getNumGroups(); r++) {
541 string name = (m->getGroups())[r];
542 if (name.length() < 10) { //pad with spaces to make compatible
543 while (name.length() < 10) { name += " "; }
546 if (outputForm == "lt") {
548 outStd << name << '\t';
551 for (int l = 0; l < r; l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t';}
552 out << endl; outStd << endl;
553 }else if (outputForm == "square") {
555 outStd << name << '\t';
558 for (int l = 0; l < m->getNumGroups(); l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t'; }
559 out << endl; outStd << endl;
562 for (int l = 0; l < r; l++) {
563 string otherName = (m->getGroups())[l];
564 if (otherName.length() < 10) { //pad with spaces to make compatible
565 while (otherName.length() < 10) { otherName += " "; }
568 out << name << '\t' << otherName << avedists[r][l] << endl;
569 outStd << name << '\t' << otherName << stddists[r][l] << endl;
578 catch(exception& e) {
579 m->errorOut(e, "UnifracWeightedCommand", "getAverageSTDMatrices");
584 /**************************************************************************************************/
585 int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
588 //used in tree constructor
591 ///create treemap class from groupmap for tree class to use
594 map<string, string> groupMap;
596 for (int i = 0; i < m->getGroups().size(); i++) {
597 nameMap.insert(m->getGroups()[i]);
598 gps.insert(m->getGroups()[i]);
599 groupMap[m->getGroups()[i]] = m->getGroups()[i];
601 newCt.createTable(nameMap, groupMap, gps);
603 //clear old tree names if any
604 m->Treenames.clear();
606 //fills globaldatas tree names
607 m->Treenames = m->getGroups();
609 vector<Tree*> newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created
611 if (m->control_pressed) { return 0; }
614 Tree* conTree = con.getTree(newTrees);
616 //create a new filename
617 map<string, string> variables;
618 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
619 variables["[tag]"] = toString(treeNum+1);
620 variables["[tag2]"] = "weighted.cons";
621 string conFile = getOutputFileName("tree",variables);
622 outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
624 m->openOutputFile(conFile, outTree);
626 if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
632 catch(exception& e) {
633 m->errorOut(e, "UnifracWeightedCommand", "getConsensusTrees");
637 /**************************************************************************************************/
639 vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, CountTable& myct) {
644 //create a new filename
645 map<string, string> variables;
646 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
647 variables["[tag]"] = toString(treeNum+1);
648 variables["[tag2]"] = "weighted.all";
649 string outputFile = getOutputFileName("tree",variables);
650 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
653 m->openOutputFile(outputFile, outAll);
656 for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
658 if (m->control_pressed) { break; }
660 //make matrix with scores in it
661 vector< vector<double> > sims; sims.resize(m->getNumGroups());
662 for (int j = 0; j < m->getNumGroups(); j++) {
663 sims[j].resize(m->getNumGroups(), 0.0);
667 for (int r=0; r<m->getNumGroups(); r++) {
668 for (int l = 0; l < r; l++) {
669 double sim = -(dists[i][count]-1.0);
677 Tree* tempTree = new Tree(&myct, sims);
678 tempTree->assembleTree();
680 trees.push_back(tempTree);
683 tempTree->print(outAll);
688 if (m->control_pressed) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
692 catch(exception& e) {
693 m->errorOut(e, "UnifracWeightedCommand", "buildTrees");
697 /**************************************************************************************************/
699 int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
702 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
703 vector< vector<string> > namesOfGroupCombos;
704 for (int a=0; a<numGroups; a++) {
705 for (int l = 0; l < a; l++) {
706 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
707 namesOfGroupCombos.push_back(groups);
713 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
715 int numPairs = namesOfGroupCombos.size();
716 int numPairsPerProcessor = numPairs / processors;
718 for (int i = 0; i < processors; i++) {
719 int startPos = i * numPairsPerProcessor;
720 if(i == processors - 1){
721 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
723 lines.push_back(linePair(startPos, numPairsPerProcessor));
729 //get scores for random trees
730 for (int j = 0; j < iters; j++) {
732 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
734 driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
736 createProcesses(thisTree, namesOfGroupCombos, rScores);
739 driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
742 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
745 // m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();
749 //find the signifigance of the score for summary file
750 for (int f = 0; f < numComp; f++) {
752 sort(rScores[f].begin(), rScores[f].end());
754 //the index of the score higher than yours is returned
755 //so if you have 1000 random trees the index returned is 100
756 //then there are 900 trees with a score greater then you.
757 //giving you a signifigance of 0.900
758 int index = findIndex(usersScores[f], f); if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
760 //the signifigance is the number of trees with the users score or higher
761 WScoreSig.push_back((iters-index)/(float)iters);
764 //out << "Tree# " << i << endl;
765 calculateFreqsCumuls();
772 catch(exception& e) {
773 m->errorOut(e, "UnifracWeightedCommand", "runRandomCalcs");
777 /**************************************************************************************************/
779 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
781 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
783 vector<int> processIDS;
787 //loop through and create all the processes you want
788 while (process != processors) {
792 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
795 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
797 //pass numSeqs to parent
799 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
800 m->openOutputFile(tempFile, out);
801 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t'; } out << endl;
806 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
807 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
812 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
814 //force parent to wait until all the processes are done
815 for (int i=0;i<(processors-1);i++) {
816 int temp = processIDS[i];
820 //get data created by processes
821 for (int i=0;i<(processors-1);i++) {
824 string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
825 m->openInputFile(s, in);
828 for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
836 catch(exception& e) {
837 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
842 /**************************************************************************************************/
843 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) {
845 Tree* randT = new Tree(ct);
847 Weighted weighted(includeRoot);
849 for (int h = start; h < (start+num); h++) {
851 if (m->control_pressed) { return 0; }
853 //initialize weighted score
854 string groupA = namesOfGroupCombos[h][0];
855 string groupB = namesOfGroupCombos[h][1];
860 //create a random tree with same topology as T[i], but different labels
861 randT->assembleRandomUnifracTree(groupA, groupB);
863 if (m->control_pressed) { delete randT; return 0; }
865 //get wscore of random tree
866 EstOutput randomData = weighted.getValues(randT, groupA, groupB);
868 if (m->control_pressed) { delete randT; return 0; }
871 scores[h].push_back(randomData[0]);
879 catch(exception& e) {
880 m->errorOut(e, "UnifracWeightedCommand", "driver");
884 /***********************************************************/
885 void UnifracWeightedCommand::printWeightedFile() {
889 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
891 for(int a = 0; a < numComp; a++) {
892 output->initFile(groupComb[a], tags);
894 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
895 data.push_back(it->first); data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
896 output->output(data);
902 catch(exception& e) {
903 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
909 /***********************************************************/
910 void UnifracWeightedCommand::printWSummaryFile() {
913 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
914 m->mothurOut("Tree#\tGroups\tWScore\t");
915 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
916 outSum << endl; m->mothurOutEndLine();
919 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
923 for (int i = 0; i < T.size(); i++) {
924 for (int j = 0; j < numComp; j++) {
926 if (WScoreSig[count] > (1/(float)iters)) {
927 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
928 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
929 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" + toString(WScoreSig[count]) + "\n");
931 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
932 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
933 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" + toString((1/float(iters))) + "\n");
936 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
937 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
938 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n");
945 catch(exception& e) {
946 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
950 /***********************************************************/
951 void UnifracWeightedCommand::createPhylipFile() {
955 for (int i = 0; i < T.size(); i++) {
957 string phylipFileName;
958 map<string, string> variables;
959 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
960 variables["[tag]"] = toString(i+1);
961 if ((outputForm == "lt") || (outputForm == "square")) {
962 variables["[tag2]"] = "weighted.phylip";
963 phylipFileName = getOutputFileName("phylip",variables);
964 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
966 variables["[tag2]"] = "weighted.column";
967 phylipFileName = getOutputFileName("column",variables);
968 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
973 m->openOutputFile(phylipFileName, out);
975 if ((outputForm == "lt") || (outputForm == "square")) {
977 out << m->getNumGroups() << endl;
980 //make matrix with scores in it
981 vector< vector<float> > dists; dists.resize(m->getNumGroups());
982 for (int i = 0; i < m->getNumGroups(); i++) {
983 dists[i].resize(m->getNumGroups(), 0.0);
986 //flip it so you can print it
987 for (int r=0; r<m->getNumGroups(); r++) {
988 for (int l = 0; l < r; l++) {
989 dists[r][l] = utreeScores[count];
990 dists[l][r] = utreeScores[count];
996 for (int r=0; r<m->getNumGroups(); r++) {
998 string name = (m->getGroups())[r];
999 if (name.length() < 10) { //pad with spaces to make compatible
1000 while (name.length() < 10) { name += " "; }
1003 if (outputForm == "lt") {
1004 out << name << '\t';
1007 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
1009 }else if (outputForm == "square") {
1010 out << name << '\t';
1013 for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; }
1017 for (int l = 0; l < r; l++) {
1018 string otherName = (m->getGroups())[l];
1019 if (otherName.length() < 10) { //pad with spaces to make compatible
1020 while (otherName.length() < 10) { otherName += " "; }
1023 out << name << '\t' << otherName << dists[r][l] << endl;
1030 catch(exception& e) {
1031 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
1035 /***********************************************************/
1036 int UnifracWeightedCommand::findIndex(float score, int index) {
1038 for (int i = 0; i < rScores[index].size(); i++) {
1039 if (rScores[index][i] >= score) { return i; }
1041 return rScores[index].size();
1043 catch(exception& e) {
1044 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
1049 /***********************************************************/
1051 void UnifracWeightedCommand::calculateFreqsCumuls() {
1053 //clear out old tree values
1055 rScoreFreq.resize(numComp);
1057 rCumul.resize(numComp);
1058 validScores.clear();
1060 //calculate frequency
1061 for (int f = 0; f < numComp; f++) {
1062 for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7... you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
1063 validScores[rScores[f][i]] = rScores[f][i];
1064 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
1065 if (it != rScoreFreq[f].end()) {
1066 rScoreFreq[f][rScores[f][i]]++;
1068 rScoreFreq[f][rScores[f][i]] = 1;
1074 for(int a = 0; a < numComp; a++) {
1075 float rcumul = 1.0000;
1076 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
1077 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
1078 //make rscoreFreq map and rCumul
1079 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
1080 rCumul[a][it->first] = rcumul;
1081 //get percentage of random trees with that info
1082 if (it2 != rScoreFreq[a].end()) { rScoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
1083 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
1088 catch(exception& e) {
1089 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
1093 /***********************************************************/