2 * unifracweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracweightedcommand.h"
11 #include "consensus.h"
12 #include "subsample.h"
13 #include "treereader.h"
15 //**********************************************************************************************************************
16 vector<string> UnifracWeightedCommand::setParameters(){
18 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
20 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
21 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
22 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
23 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25 CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
26 CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
27 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
28 CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(pdistance);
29 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
30 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
31 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
33 vector<string> myArray;
34 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
38 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
42 //**********************************************************************************************************************
43 string UnifracWeightedCommand::getHelpString(){
45 string helpString = "";
46 helpString += "The unifrac.weighted command parameters are tree, group, name, count, groups, iters, distance, processors, root, subsample, consensus and random. tree parameter is required unless you have valid current tree file.\n";
47 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
48 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
49 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
50 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
51 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
52 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
53 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
54 helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results, as well as a consensus tree built from these trees. Default=F.\n";
55 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
56 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
57 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
58 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
59 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
63 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
67 //**********************************************************************************************************************
68 string UnifracWeightedCommand::getOutputFileNameTag(string type, string inputName=""){
70 string outputFileName = "";
71 map<string, vector<string> >::iterator it;
73 //is this a type this command creates
74 it = outputTypes.find(type);
75 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
77 if (type == "weighted") { outputFileName = "weighted"; }
78 else if (type == "wsummary") { outputFileName = "wsummary"; }
79 else if (type == "phylip") { outputFileName = "dist"; }
80 else if (type == "column") { outputFileName = "dist"; }
81 else if (type == "tree") { outputFileName = "tre"; }
82 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
84 return outputFileName;
87 m->errorOut(e, "UnifracWeightedCommand", "getOutputFileNameTag");
91 //**********************************************************************************************************************
92 UnifracWeightedCommand::UnifracWeightedCommand(){
94 abort = true; calledHelp = true;
96 vector<string> tempOutNames;
97 outputTypes["weighted"] = tempOutNames;
98 outputTypes["wsummary"] = tempOutNames;
99 outputTypes["phylip"] = tempOutNames;
100 outputTypes["column"] = tempOutNames;
101 outputTypes["tree"] = tempOutNames;
103 catch(exception& e) {
104 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
109 /***********************************************************/
110 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
112 abort = false; calledHelp = false;
114 //allow user to run help
115 if(option == "help") { help(); abort = true; calledHelp = true; }
116 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
119 vector<string> myArray = setParameters();
121 OptionParser parser(option);
122 map<string,string> parameters=parser.getParameters();
123 map<string,string>::iterator it;
125 ValidParameters validParameter;
127 //check to make sure all parameters are valid for command
128 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
129 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
132 //initialize outputTypes
133 vector<string> tempOutNames;
134 outputTypes["weighted"] = tempOutNames;
135 outputTypes["wsummary"] = tempOutNames;
136 outputTypes["phylip"] = tempOutNames;
137 outputTypes["column"] = tempOutNames;
138 outputTypes["tree"] = tempOutNames;
140 //if the user changes the input directory command factory will send this info to us in the output parameter
141 string inputDir = validParameter.validFile(parameters, "inputdir", false);
142 if (inputDir == "not found"){ inputDir = ""; }
145 it = parameters.find("tree");
146 //user has given a template file
147 if(it != parameters.end()){
148 path = m->hasPath(it->second);
149 //if the user has not given a path then, add inputdir. else leave path alone.
150 if (path == "") { parameters["tree"] = inputDir + it->second; }
153 it = parameters.find("group");
154 //user has given a template file
155 if(it != parameters.end()){
156 path = m->hasPath(it->second);
157 //if the user has not given a path then, add inputdir. else leave path alone.
158 if (path == "") { parameters["group"] = inputDir + it->second; }
161 it = parameters.find("name");
162 //user has given a template file
163 if(it != parameters.end()){
164 path = m->hasPath(it->second);
165 //if the user has not given a path then, add inputdir. else leave path alone.
166 if (path == "") { parameters["name"] = inputDir + it->second; }
169 it = parameters.find("count");
170 //user has given a template file
171 if(it != parameters.end()){
172 path = m->hasPath(it->second);
173 //if the user has not given a path then, add inputdir. else leave path alone.
174 if (path == "") { parameters["count"] = inputDir + it->second; }
178 //check for required parameters
179 treefile = validParameter.validFile(parameters, "tree", true);
180 if (treefile == "not open") { treefile = ""; abort = true; }
181 else if (treefile == "not found") { //if there is a current design file, use it
182 treefile = m->getTreeFile();
183 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
184 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
185 }else { m->setTreeFile(treefile); }
187 //check for required parameters
188 groupfile = validParameter.validFile(parameters, "group", true);
189 if (groupfile == "not open") { abort = true; }
190 else if (groupfile == "not found") { groupfile = ""; }
191 else { m->setGroupFile(groupfile); }
193 namefile = validParameter.validFile(parameters, "name", true);
194 if (namefile == "not open") { namefile = ""; abort = true; }
195 else if (namefile == "not found") { namefile = ""; }
196 else { m->setNameFile(namefile); }
198 countfile = validParameter.validFile(parameters, "count", true);
199 if (countfile == "not open") { countfile = ""; abort = true; }
200 else if (countfile == "not found") { countfile = ""; }
201 else { m->setCountTableFile(countfile); }
203 if ((namefile != "") && (countfile != "")) {
204 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
207 if ((groupfile != "") && (countfile != "")) {
208 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
211 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
214 //check for optional parameter and set defaults
215 // ...at some point should added some additional type checking...
216 groups = validParameter.validFile(parameters, "groups", false);
217 if (groups == "not found") { groups = ""; }
219 m->splitAtDash(groups, Groups);
220 m->setGroups(Groups);
223 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
224 m->mothurConvert(itersString, iters);
226 string temp = validParameter.validFile(parameters, "distance", false);
227 if (temp == "not found") { phylip = false; outputForm = ""; }
229 if (temp=="phylip") { temp = "lt"; }
230 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
231 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
234 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "F"; }
235 random = m->isTrue(temp);
237 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
238 includeRoot = m->isTrue(temp);
240 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
241 m->setProcessors(temp);
242 m->mothurConvert(temp, processors);
244 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
245 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
247 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
248 else { subsample = false; }
251 if (!subsample) { subsampleIters = 0; }
252 else { subsampleIters = iters; }
254 temp = validParameter.validFile(parameters, "consensus", false); if (temp == "not found") { temp = "F"; }
255 consensus = m->isTrue(temp);
257 if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; }
258 if (countfile == "") { if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; } }
261 if ((!testCt.testGroups(countfile)) && (subsample)) {
262 m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true;
265 if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
266 if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
269 if (namefile == "") {
270 vector<string> files; files.push_back(treefile);
271 parser.getNameFile(files);
278 catch(exception& e) {
279 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
283 /***********************************************************/
284 int UnifracWeightedCommand::execute() {
287 if (abort == true) { if (calledHelp) { return 0; } return 2; }
289 m->setTreeFile(treefile);
292 if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
293 else { reader = new TreeReader(treefile, countfile); }
294 T = reader->getTrees();
295 ct = T[0]->getCountTable();
298 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
300 sumFile = outputDir + m->getSimpleName(treefile) + getOutputFileNameTag("wsummary");
301 m->openOutputFile(sumFile, outSum);
302 outputNames.push_back(sumFile); outputTypes["wsummary"].push_back(sumFile);
305 string s; //to make work with setgroups
306 Groups = m->getGroups();
307 vector<string> nameGroups = ct->getNamesOfGroups();
308 util.setGroups(Groups, nameGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze
309 m->setGroups(Groups);
311 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
313 Weighted weighted(includeRoot);
315 int start = time(NULL);
319 //user has not set size, set size = smallest samples size
320 if (subsampleSize == -1) {
321 vector<string> temp; temp.push_back(Groups[0]);
322 subsampleSize = ct->getGroupCount(Groups[0]); //num in first group
323 for (int i = 1; i < Groups.size(); i++) {
324 int thisSize = ct->getGroupCount(Groups[i]);
325 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
327 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
328 }else { //eliminate any too small groups
329 vector<string> newGroups = Groups;
331 for (int i = 0; i < newGroups.size(); i++) {
332 int thisSize = ct->getGroupCount(newGroups[i]);
334 if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); }
335 else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
337 m->setGroups(Groups);
341 //here in case some groups are removed by subsample
342 util.getCombos(groupComb, Groups, numComp);
344 if (numComp < processors) { processors = numComp; }
346 if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
348 //get weighted scores for users trees
349 for (int i = 0; i < T.size(); i++) {
351 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
354 rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
355 uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
357 vector<double> userData; userData.resize(numComp,0); //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
358 vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
361 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("weighted"), itersString);
362 outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("weighted"));
363 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("weighted"));
366 userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
367 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
370 for (int s=0; s<numComp; s++) {
371 //add users score to vector of user scores
372 uScores[s].push_back(userData[s]);
373 //save users tree score for summary file
374 utreeScores.push_back(userData[s]);
377 if (random) { runRandomCalcs(T[i], userData); }
385 vector< vector<double> > calcDistsTotals; //each iter, each groupCombos dists. this will be used to make .dist files
386 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
388 if (m->control_pressed) { break; }
390 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
391 CountTable* newCt = new CountTable();
393 //uses method of setting groups to doNotIncludeMe
395 Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
397 //call new weighted function
398 vector<double> iterData; iterData.resize(numComp,0);
399 Weighted thisWeighted(includeRoot);
400 iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
402 //save data to make ave dist, std dist
403 calcDistsTotals.push_back(iterData);
406 delete subSampleTree;
408 if((thisIter+1) % 100 == 0){ m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine(); }
411 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
413 if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); }
414 if (consensus) { getConsensusTrees(calcDistsTotals, i); }
418 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
420 if (phylip) { createPhylipFile(); }
424 //clear out users groups
427 for (int i = 0; i < T.size(); i++) { delete T[i]; }
429 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
431 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
433 //set phylip file as new current phylipfile
435 itTypes = outputTypes.find("phylip");
436 if (itTypes != outputTypes.end()) {
437 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
440 //set column file as new current columnfile
441 itTypes = outputTypes.find("column");
442 if (itTypes != outputTypes.end()) {
443 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
446 m->mothurOutEndLine();
447 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
448 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
449 m->mothurOutEndLine();
454 catch(exception& e) {
455 m->errorOut(e, "UnifracWeightedCommand", "execute");
459 /**************************************************************************************************/
460 int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
462 //we need to find the average distance and standard deviation for each groups distance
465 vector<double> averages; averages.resize(numComp, 0);
466 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
467 for (int i = 0; i < dists[thisIter].size(); i++) {
468 averages[i] += dists[thisIter][i];
473 for (int i = 0; i < averages.size(); i++) { averages[i] /= (float) subsampleIters; }
475 //find standard deviation
476 vector<double> stdDev; stdDev.resize(numComp, 0);
478 for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
479 for (int j = 0; j < dists[thisIter].size(); j++) {
480 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
483 for (int i = 0; i < stdDev.size(); i++) {
484 stdDev[i] /= (float) subsampleIters;
485 stdDev[i] = sqrt(stdDev[i]);
488 //make matrix with scores in it
489 vector< vector<double> > avedists; avedists.resize(m->getNumGroups());
490 for (int i = 0; i < m->getNumGroups(); i++) {
491 avedists[i].resize(m->getNumGroups(), 0.0);
494 //make matrix with scores in it
495 vector< vector<double> > stddists; stddists.resize(m->getNumGroups());
496 for (int i = 0; i < m->getNumGroups(); i++) {
497 stddists[i].resize(m->getNumGroups(), 0.0);
500 //flip it so you can print it
502 for (int r=0; r<m->getNumGroups(); r++) {
503 for (int l = 0; l < r; l++) {
504 avedists[r][l] = averages[count];
505 avedists[l][r] = averages[count];
506 stddists[r][l] = stdDev[count];
507 stddists[l][r] = stdDev[count];
512 string aveFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.ave." + getOutputFileNameTag("phylip");
513 if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); }
514 else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName); }
516 m->openOutputFile(aveFileName, out);
518 string stdFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.std." + getOutputFileNameTag("phylip");
519 if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
520 else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }
522 m->openOutputFile(stdFileName, outStd);
524 if ((outputForm == "lt") || (outputForm == "square")) {
526 out << m->getNumGroups() << endl;
527 outStd << m->getNumGroups() << endl;
531 for (int r=0; r<m->getNumGroups(); r++) {
533 string name = (m->getGroups())[r];
534 if (name.length() < 10) { //pad with spaces to make compatible
535 while (name.length() < 10) { name += " "; }
538 if (outputForm == "lt") {
540 outStd << name << '\t';
543 for (int l = 0; l < r; l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t';}
544 out << endl; outStd << endl;
545 }else if (outputForm == "square") {
547 outStd << name << '\t';
550 for (int l = 0; l < m->getNumGroups(); l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t'; }
551 out << endl; outStd << endl;
554 for (int l = 0; l < r; l++) {
555 string otherName = (m->getGroups())[l];
556 if (otherName.length() < 10) { //pad with spaces to make compatible
557 while (otherName.length() < 10) { otherName += " "; }
560 out << name << '\t' << otherName << avedists[r][l] << endl;
561 outStd << name << '\t' << otherName << stddists[r][l] << endl;
570 catch(exception& e) {
571 m->errorOut(e, "UnifracWeightedCommand", "getAverageSTDMatrices");
576 /**************************************************************************************************/
577 int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
580 //used in tree constructor
583 ///create treemap class from groupmap for tree class to use
586 map<string, string> groupMap;
588 for (int i = 0; i < m->getGroups().size(); i++) {
589 nameMap.insert(m->getGroups()[i]);
590 gps.insert(m->getGroups()[i]);
591 groupMap[m->getGroups()[i]] = m->getGroups()[i];
593 newCt.createTable(nameMap, groupMap, gps);
595 //clear old tree names if any
596 m->Treenames.clear();
598 //fills globaldatas tree names
599 m->Treenames = m->getGroups();
601 vector<Tree*> newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created
603 if (m->control_pressed) { return 0; }
606 Tree* conTree = con.getTree(newTrees);
608 //create a new filename
609 string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons." + getOutputFileNameTag("tree");
610 outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
612 m->openOutputFile(conFile, outTree);
614 if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
620 catch(exception& e) {
621 m->errorOut(e, "UnifracWeightedCommand", "getConsensusTrees");
625 /**************************************************************************************************/
627 vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, CountTable& myct) {
632 //create a new filename
633 string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.all." + getOutputFileNameTag("tree");
634 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
637 m->openOutputFile(outputFile, outAll);
640 for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
642 if (m->control_pressed) { break; }
644 //make matrix with scores in it
645 vector< vector<double> > sims; sims.resize(m->getNumGroups());
646 for (int j = 0; j < m->getNumGroups(); j++) {
647 sims[j].resize(m->getNumGroups(), 0.0);
651 for (int r=0; r<m->getNumGroups(); r++) {
652 for (int l = 0; l < r; l++) {
653 double sim = -(dists[i][count]-1.0);
661 Tree* tempTree = new Tree(&myct, sims);
662 tempTree->assembleTree();
664 trees.push_back(tempTree);
667 tempTree->print(outAll);
672 if (m->control_pressed) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
676 catch(exception& e) {
677 m->errorOut(e, "UnifracWeightedCommand", "buildTrees");
681 /**************************************************************************************************/
683 int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
686 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
687 vector< vector<string> > namesOfGroupCombos;
688 for (int a=0; a<numGroups; a++) {
689 for (int l = 0; l < a; l++) {
690 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
691 namesOfGroupCombos.push_back(groups);
697 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
699 int numPairs = namesOfGroupCombos.size();
700 int numPairsPerProcessor = numPairs / processors;
702 for (int i = 0; i < processors; i++) {
703 int startPos = i * numPairsPerProcessor;
704 if(i == processors - 1){
705 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
707 lines.push_back(linePair(startPos, numPairsPerProcessor));
713 //get scores for random trees
714 for (int j = 0; j < iters; j++) {
716 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
718 driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
720 createProcesses(thisTree, namesOfGroupCombos, rScores);
723 driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
726 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
729 // m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();
733 //find the signifigance of the score for summary file
734 for (int f = 0; f < numComp; f++) {
736 sort(rScores[f].begin(), rScores[f].end());
738 //the index of the score higher than yours is returned
739 //so if you have 1000 random trees the index returned is 100
740 //then there are 900 trees with a score greater then you.
741 //giving you a signifigance of 0.900
742 int index = findIndex(usersScores[f], f); if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
744 //the signifigance is the number of trees with the users score or higher
745 WScoreSig.push_back((iters-index)/(float)iters);
748 //out << "Tree# " << i << endl;
749 calculateFreqsCumuls();
756 catch(exception& e) {
757 m->errorOut(e, "UnifracWeightedCommand", "runRandomCalcs");
761 /**************************************************************************************************/
763 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
765 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
767 vector<int> processIDS;
771 //loop through and create all the processes you want
772 while (process != processors) {
776 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
779 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
781 //pass numSeqs to parent
783 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
784 m->openOutputFile(tempFile, out);
785 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t'; } out << endl;
790 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
791 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
796 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
798 //force parent to wait until all the processes are done
799 for (int i=0;i<(processors-1);i++) {
800 int temp = processIDS[i];
804 //get data created by processes
805 for (int i=0;i<(processors-1);i++) {
808 string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
809 m->openInputFile(s, in);
812 for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
820 catch(exception& e) {
821 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
826 /**************************************************************************************************/
827 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) {
829 Tree* randT = new Tree(ct);
831 Weighted weighted(includeRoot);
833 for (int h = start; h < (start+num); h++) {
835 if (m->control_pressed) { return 0; }
837 //initialize weighted score
838 string groupA = namesOfGroupCombos[h][0];
839 string groupB = namesOfGroupCombos[h][1];
844 //create a random tree with same topology as T[i], but different labels
845 randT->assembleRandomUnifracTree(groupA, groupB);
847 if (m->control_pressed) { delete randT; return 0; }
849 //get wscore of random tree
850 EstOutput randomData = weighted.getValues(randT, groupA, groupB);
852 if (m->control_pressed) { delete randT; return 0; }
855 scores[h].push_back(randomData[0]);
863 catch(exception& e) {
864 m->errorOut(e, "UnifracWeightedCommand", "driver");
868 /***********************************************************/
869 void UnifracWeightedCommand::printWeightedFile() {
873 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
875 for(int a = 0; a < numComp; a++) {
876 output->initFile(groupComb[a], tags);
878 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
879 data.push_back(it->first); data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
880 output->output(data);
886 catch(exception& e) {
887 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
893 /***********************************************************/
894 void UnifracWeightedCommand::printWSummaryFile() {
897 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
898 m->mothurOut("Tree#\tGroups\tWScore\t");
899 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
900 outSum << endl; m->mothurOutEndLine();
903 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
907 for (int i = 0; i < T.size(); i++) {
908 for (int j = 0; j < numComp; j++) {
910 if (WScoreSig[count] > (1/(float)iters)) {
911 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
912 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
913 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" + toString(WScoreSig[count]) + "\n");
915 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
916 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
917 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" + toString((1/float(iters))) + "\n");
920 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
921 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
922 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n");
929 catch(exception& e) {
930 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
934 /***********************************************************/
935 void UnifracWeightedCommand::createPhylipFile() {
939 for (int i = 0; i < T.size(); i++) {
941 string phylipFileName;
942 if ((outputForm == "lt") || (outputForm == "square")) {
943 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.phylip." + getOutputFileNameTag("phylip");
944 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
946 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.column." + getOutputFileNameTag("column");
947 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
951 m->openOutputFile(phylipFileName, out);
953 if ((outputForm == "lt") || (outputForm == "square")) {
955 out << m->getNumGroups() << endl;
958 //make matrix with scores in it
959 vector< vector<float> > dists; dists.resize(m->getNumGroups());
960 for (int i = 0; i < m->getNumGroups(); i++) {
961 dists[i].resize(m->getNumGroups(), 0.0);
964 //flip it so you can print it
965 for (int r=0; r<m->getNumGroups(); r++) {
966 for (int l = 0; l < r; l++) {
967 dists[r][l] = utreeScores[count];
968 dists[l][r] = utreeScores[count];
974 for (int r=0; r<m->getNumGroups(); r++) {
976 string name = (m->getGroups())[r];
977 if (name.length() < 10) { //pad with spaces to make compatible
978 while (name.length() < 10) { name += " "; }
981 if (outputForm == "lt") {
985 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
987 }else if (outputForm == "square") {
991 for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; }
995 for (int l = 0; l < r; l++) {
996 string otherName = (m->getGroups())[l];
997 if (otherName.length() < 10) { //pad with spaces to make compatible
998 while (otherName.length() < 10) { otherName += " "; }
1001 out << name << '\t' << otherName << dists[r][l] << endl;
1008 catch(exception& e) {
1009 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
1013 /***********************************************************/
1014 int UnifracWeightedCommand::findIndex(float score, int index) {
1016 for (int i = 0; i < rScores[index].size(); i++) {
1017 if (rScores[index][i] >= score) { return i; }
1019 return rScores[index].size();
1021 catch(exception& e) {
1022 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
1027 /***********************************************************/
1029 void UnifracWeightedCommand::calculateFreqsCumuls() {
1031 //clear out old tree values
1033 rScoreFreq.resize(numComp);
1035 rCumul.resize(numComp);
1036 validScores.clear();
1038 //calculate frequency
1039 for (int f = 0; f < numComp; f++) {
1040 for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7... you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
1041 validScores[rScores[f][i]] = rScores[f][i];
1042 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
1043 if (it != rScoreFreq[f].end()) {
1044 rScoreFreq[f][rScores[f][i]]++;
1046 rScoreFreq[f][rScores[f][i]] = 1;
1052 for(int a = 0; a < numComp; a++) {
1053 float rcumul = 1.0000;
1054 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
1055 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
1056 //make rscoreFreq map and rCumul
1057 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
1058 rCumul[a][it->first] = rcumul;
1059 //get percentage of random trees with that info
1060 if (it2 != rScoreFreq[a].end()) { rScoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
1061 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
1066 catch(exception& e) {
1067 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
1071 /***********************************************************/