2 * unifracweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracweightedcommand.h"
11 #include "consensus.h"
12 #include "subsample.h"
13 #include "treereader.h"
15 //**********************************************************************************************************************
16 vector<string> UnifracWeightedCommand::setParameters(){
18 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
20 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
21 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
22 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
23 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25 CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
26 CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
27 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
28 CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(pdistance);
29 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
30 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
31 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
33 vector<string> myArray;
34 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
38 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
42 //**********************************************************************************************************************
43 string UnifracWeightedCommand::getHelpString(){
45 string helpString = "";
46 helpString += "The unifrac.weighted command parameters are tree, group, name, count, groups, iters, distance, processors, root, subsample, consensus and random. tree parameter is required unless you have valid current tree file.\n";
47 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
48 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
49 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
50 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
51 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
52 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
53 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
54 helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results, as well as a consensus tree built from these trees. Default=F.\n";
55 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
56 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
57 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
58 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
59 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
63 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
67 //**********************************************************************************************************************
68 string UnifracWeightedCommand::getOutputFileNameTag(string type, string inputName=""){
70 string outputFileName = "";
71 map<string, vector<string> >::iterator it;
73 //is this a type this command creates
74 it = outputTypes.find(type);
75 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
77 if (type == "weighted") { outputFileName = "weighted"; }
78 else if (type == "wsummary") { outputFileName = "wsummary"; }
79 else if (type == "phylip") { outputFileName = "dist"; }
80 else if (type == "column") { outputFileName = "dist"; }
81 else if (type == "tree") { outputFileName = "tre"; }
82 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
84 return outputFileName;
87 m->errorOut(e, "UnifracWeightedCommand", "getOutputFileNameTag");
91 //**********************************************************************************************************************
92 UnifracWeightedCommand::UnifracWeightedCommand(){
94 abort = true; calledHelp = true;
96 vector<string> tempOutNames;
97 outputTypes["weighted"] = tempOutNames;
98 outputTypes["wsummary"] = tempOutNames;
99 outputTypes["phylip"] = tempOutNames;
100 outputTypes["column"] = tempOutNames;
101 outputTypes["tree"] = tempOutNames;
103 catch(exception& e) {
104 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
109 /***********************************************************/
110 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
112 abort = false; calledHelp = false;
114 //allow user to run help
115 if(option == "help") { help(); abort = true; calledHelp = true; }
116 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
119 vector<string> myArray = setParameters();
121 OptionParser parser(option);
122 map<string,string> parameters=parser.getParameters();
123 map<string,string>::iterator it;
125 ValidParameters validParameter;
127 //check to make sure all parameters are valid for command
128 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
129 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
132 //initialize outputTypes
133 vector<string> tempOutNames;
134 outputTypes["weighted"] = tempOutNames;
135 outputTypes["wsummary"] = tempOutNames;
136 outputTypes["phylip"] = tempOutNames;
137 outputTypes["column"] = tempOutNames;
138 outputTypes["tree"] = tempOutNames;
140 //if the user changes the input directory command factory will send this info to us in the output parameter
141 string inputDir = validParameter.validFile(parameters, "inputdir", false);
142 if (inputDir == "not found"){ inputDir = ""; }
145 it = parameters.find("tree");
146 //user has given a template file
147 if(it != parameters.end()){
148 path = m->hasPath(it->second);
149 //if the user has not given a path then, add inputdir. else leave path alone.
150 if (path == "") { parameters["tree"] = inputDir + it->second; }
153 it = parameters.find("group");
154 //user has given a template file
155 if(it != parameters.end()){
156 path = m->hasPath(it->second);
157 //if the user has not given a path then, add inputdir. else leave path alone.
158 if (path == "") { parameters["group"] = inputDir + it->second; }
161 it = parameters.find("name");
162 //user has given a template file
163 if(it != parameters.end()){
164 path = m->hasPath(it->second);
165 //if the user has not given a path then, add inputdir. else leave path alone.
166 if (path == "") { parameters["name"] = inputDir + it->second; }
169 it = parameters.find("count");
170 //user has given a template file
171 if(it != parameters.end()){
172 path = m->hasPath(it->second);
173 //if the user has not given a path then, add inputdir. else leave path alone.
174 if (path == "") { parameters["count"] = inputDir + it->second; }
178 //check for required parameters
179 treefile = validParameter.validFile(parameters, "tree", true);
180 if (treefile == "not open") { treefile = ""; abort = true; }
181 else if (treefile == "not found") { //if there is a current design file, use it
182 treefile = m->getTreeFile();
183 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
184 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
185 }else { m->setTreeFile(treefile); }
187 //check for required parameters
188 groupfile = validParameter.validFile(parameters, "group", true);
189 if (groupfile == "not open") { abort = true; }
190 else if (groupfile == "not found") { groupfile = ""; }
191 else { m->setGroupFile(groupfile); }
193 namefile = validParameter.validFile(parameters, "name", true);
194 if (namefile == "not open") { namefile = ""; abort = true; }
195 else if (namefile == "not found") { namefile = ""; }
196 else { m->setNameFile(namefile); }
198 countfile = validParameter.validFile(parameters, "count", true);
199 if (countfile == "not open") { countfile = ""; abort = true; }
200 else if (countfile == "not found") { countfile = ""; }
201 else { m->setCountTableFile(countfile); }
203 if ((namefile != "") && (countfile != "")) {
204 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
207 if ((groupfile != "") && (countfile != "")) {
208 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
211 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
214 //check for optional parameter and set defaults
215 // ...at some point should added some additional type checking...
216 groups = validParameter.validFile(parameters, "groups", false);
217 if (groups == "not found") { groups = ""; }
219 m->splitAtDash(groups, Groups);
220 m->setGroups(Groups);
223 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
224 m->mothurConvert(itersString, iters);
226 string temp = validParameter.validFile(parameters, "distance", false);
227 if (temp == "not found") { phylip = false; outputForm = ""; }
229 if (temp=="phylip") { temp = "lt"; }
230 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
231 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
234 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "F"; }
235 random = m->isTrue(temp);
237 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
238 includeRoot = m->isTrue(temp);
240 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
241 m->setProcessors(temp);
242 m->mothurConvert(temp, processors);
244 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
245 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
247 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
248 else { subsample = false; }
251 if (!subsample) { subsampleIters = 0; }
252 else { subsampleIters = iters; }
254 temp = validParameter.validFile(parameters, "consensus", false); if (temp == "not found") { temp = "F"; }
255 consensus = m->isTrue(temp);
257 if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; }
258 if (countfile == "") { if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; } }
261 if ((!testCt.testGroups(countfile)) && (subsample)) {
262 m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true;
265 if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
266 if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
269 if (namefile == "") {
270 vector<string> files; files.push_back(treefile);
271 parser.getNameFile(files);
278 catch(exception& e) {
279 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
283 /***********************************************************/
284 int UnifracWeightedCommand::execute() {
287 if (abort == true) { if (calledHelp) { return 0; } return 2; }
289 m->setTreeFile(treefile);
292 if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
293 else { reader = new TreeReader(treefile, countfile); }
294 T = reader->getTrees();
295 ct = T[0]->getCountTable();
298 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
300 sumFile = outputDir + m->getSimpleName(treefile) + getOutputFileNameTag("wsummary");
301 m->openOutputFile(sumFile, outSum);
302 outputNames.push_back(sumFile); outputTypes["wsummary"].push_back(sumFile);
305 string s; //to make work with setgroups
306 Groups = m->getGroups();
307 vector<string> nameGroups = ct->getNamesOfGroups();
308 util.setGroups(Groups, nameGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze
309 m->setGroups(Groups);
311 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
313 Weighted weighted(includeRoot);
315 int start = time(NULL);
319 //user has not set size, set size = smallest samples size
320 if (subsampleSize == -1) {
321 vector<string> temp; temp.push_back(Groups[0]);
322 subsampleSize = ct->getGroupCount(Groups[0]); //num in first group
323 for (int i = 1; i < Groups.size(); i++) {
324 int thisSize = ct->getGroupCount(Groups[i]);
325 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
327 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
328 }else { //eliminate any too small groups
329 vector<string> newGroups = Groups;
331 for (int i = 0; i < newGroups.size(); i++) {
332 int thisSize = ct->getGroupCount(newGroups[i]);
334 if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); }
335 else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
337 m->setGroups(Groups);
341 //here in case some groups are removed by subsample
342 util.getCombos(groupComb, Groups, numComp);
344 if (numComp < processors) { processors = numComp; }
346 if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
348 //get weighted scores for users trees
349 for (int i = 0; i < T.size(); i++) {
351 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
354 rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
355 uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
357 vector<double> userData; userData.resize(numComp,0); //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
358 vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
361 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("weighted"), itersString);
362 outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("weighted"));
363 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("weighted"));
366 userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
367 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
370 for (int s=0; s<numComp; s++) {
371 //add users score to vector of user scores
372 uScores[s].push_back(userData[s]);
373 //save users tree score for summary file
374 utreeScores.push_back(userData[s]);
377 if (random) { runRandomCalcs(T[i], userData); }
385 vector< vector<double> > calcDistsTotals; //each iter, each groupCombos dists. this will be used to make .dist files
386 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
387 if (m->control_pressed) { break; }
389 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
390 CountTable* newCt = new CountTable();
393 if (m->debug) { sampleTime = time(NULL); }
395 //uses method of setting groups to doNotIncludeMe
397 Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
399 if (m->debug) { m->mothurOut("[DEBUG]: iter " + toString(thisIter) + " took " + toString(time(NULL) - sampleTime) + " seconds to sample tree.\n"); }
401 //call new weighted function
402 vector<double> iterData; iterData.resize(numComp,0);
403 Weighted thisWeighted(includeRoot);
404 iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
406 //save data to make ave dist, std dist
407 calcDistsTotals.push_back(iterData);
410 delete subSampleTree;
412 if((thisIter+1) % 100 == 0){ m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine(); }
415 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
417 if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); }
418 if (consensus) { getConsensusTrees(calcDistsTotals, i); }
422 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
424 if (phylip) { createPhylipFile(); }
428 //clear out users groups
431 for (int i = 0; i < T.size(); i++) { delete T[i]; }
433 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
435 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
437 //set phylip file as new current phylipfile
439 itTypes = outputTypes.find("phylip");
440 if (itTypes != outputTypes.end()) {
441 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
444 //set column file as new current columnfile
445 itTypes = outputTypes.find("column");
446 if (itTypes != outputTypes.end()) {
447 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
450 m->mothurOutEndLine();
451 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
452 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
453 m->mothurOutEndLine();
458 catch(exception& e) {
459 m->errorOut(e, "UnifracWeightedCommand", "execute");
463 /**************************************************************************************************/
464 int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
466 //we need to find the average distance and standard deviation for each groups distance
469 vector<double> averages; averages.resize(numComp, 0);
470 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
471 for (int i = 0; i < dists[thisIter].size(); i++) {
472 averages[i] += dists[thisIter][i];
477 for (int i = 0; i < averages.size(); i++) { averages[i] /= (float) subsampleIters; }
479 //find standard deviation
480 vector<double> stdDev; stdDev.resize(numComp, 0);
482 for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
483 for (int j = 0; j < dists[thisIter].size(); j++) {
484 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
487 for (int i = 0; i < stdDev.size(); i++) {
488 stdDev[i] /= (float) subsampleIters;
489 stdDev[i] = sqrt(stdDev[i]);
492 //make matrix with scores in it
493 vector< vector<double> > avedists; avedists.resize(m->getNumGroups());
494 for (int i = 0; i < m->getNumGroups(); i++) {
495 avedists[i].resize(m->getNumGroups(), 0.0);
498 //make matrix with scores in it
499 vector< vector<double> > stddists; stddists.resize(m->getNumGroups());
500 for (int i = 0; i < m->getNumGroups(); i++) {
501 stddists[i].resize(m->getNumGroups(), 0.0);
504 //flip it so you can print it
506 for (int r=0; r<m->getNumGroups(); r++) {
507 for (int l = 0; l < r; l++) {
508 avedists[r][l] = averages[count];
509 avedists[l][r] = averages[count];
510 stddists[r][l] = stdDev[count];
511 stddists[l][r] = stdDev[count];
516 string aveFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.ave." + getOutputFileNameTag("phylip");
517 if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); }
518 else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName); }
520 m->openOutputFile(aveFileName, out);
522 string stdFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.std." + getOutputFileNameTag("phylip");
523 if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
524 else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }
526 m->openOutputFile(stdFileName, outStd);
528 if ((outputForm == "lt") || (outputForm == "square")) {
530 out << m->getNumGroups() << endl;
531 outStd << m->getNumGroups() << endl;
535 for (int r=0; r<m->getNumGroups(); r++) {
537 string name = (m->getGroups())[r];
538 if (name.length() < 10) { //pad with spaces to make compatible
539 while (name.length() < 10) { name += " "; }
542 if (outputForm == "lt") {
544 outStd << name << '\t';
547 for (int l = 0; l < r; l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t';}
548 out << endl; outStd << endl;
549 }else if (outputForm == "square") {
551 outStd << name << '\t';
554 for (int l = 0; l < m->getNumGroups(); l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t'; }
555 out << endl; outStd << endl;
558 for (int l = 0; l < r; l++) {
559 string otherName = (m->getGroups())[l];
560 if (otherName.length() < 10) { //pad with spaces to make compatible
561 while (otherName.length() < 10) { otherName += " "; }
564 out << name << '\t' << otherName << avedists[r][l] << endl;
565 outStd << name << '\t' << otherName << stddists[r][l] << endl;
574 catch(exception& e) {
575 m->errorOut(e, "UnifracWeightedCommand", "getAverageSTDMatrices");
580 /**************************************************************************************************/
581 int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
584 //used in tree constructor
587 ///create treemap class from groupmap for tree class to use
590 map<string, string> groupMap;
592 for (int i = 0; i < m->getGroups().size(); i++) {
593 nameMap.insert(m->getGroups()[i]);
594 gps.insert(m->getGroups()[i]);
595 groupMap[m->getGroups()[i]] = m->getGroups()[i];
597 newCt.createTable(nameMap, groupMap, gps);
599 //clear old tree names if any
600 m->Treenames.clear();
602 //fills globaldatas tree names
603 m->Treenames = m->getGroups();
605 vector<Tree*> newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created
607 if (m->control_pressed) { return 0; }
610 Tree* conTree = con.getTree(newTrees);
612 //create a new filename
613 string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons." + getOutputFileNameTag("tree");
614 outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
616 m->openOutputFile(conFile, outTree);
618 if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
624 catch(exception& e) {
625 m->errorOut(e, "UnifracWeightedCommand", "getConsensusTrees");
629 /**************************************************************************************************/
631 vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, CountTable& myct) {
636 //create a new filename
637 string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.all." + getOutputFileNameTag("tree");
638 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
641 m->openOutputFile(outputFile, outAll);
644 for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
646 if (m->control_pressed) { break; }
648 //make matrix with scores in it
649 vector< vector<double> > sims; sims.resize(m->getNumGroups());
650 for (int j = 0; j < m->getNumGroups(); j++) {
651 sims[j].resize(m->getNumGroups(), 0.0);
655 for (int r=0; r<m->getNumGroups(); r++) {
656 for (int l = 0; l < r; l++) {
657 double sim = -(dists[i][count]-1.0);
665 Tree* tempTree = new Tree(&myct, sims);
666 tempTree->assembleTree();
668 trees.push_back(tempTree);
671 tempTree->print(outAll);
676 if (m->control_pressed) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
680 catch(exception& e) {
681 m->errorOut(e, "UnifracWeightedCommand", "buildTrees");
685 /**************************************************************************************************/
687 int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
690 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
691 vector< vector<string> > namesOfGroupCombos;
692 for (int a=0; a<numGroups; a++) {
693 for (int l = 0; l < a; l++) {
694 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
695 namesOfGroupCombos.push_back(groups);
701 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
703 int numPairs = namesOfGroupCombos.size();
704 int numPairsPerProcessor = numPairs / processors;
706 for (int i = 0; i < processors; i++) {
707 int startPos = i * numPairsPerProcessor;
708 if(i == processors - 1){
709 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
711 lines.push_back(linePair(startPos, numPairsPerProcessor));
717 //get scores for random trees
718 for (int j = 0; j < iters; j++) {
720 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
722 driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
724 createProcesses(thisTree, namesOfGroupCombos, rScores);
727 driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
730 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
733 // m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();
737 //find the signifigance of the score for summary file
738 for (int f = 0; f < numComp; f++) {
740 sort(rScores[f].begin(), rScores[f].end());
742 //the index of the score higher than yours is returned
743 //so if you have 1000 random trees the index returned is 100
744 //then there are 900 trees with a score greater then you.
745 //giving you a signifigance of 0.900
746 int index = findIndex(usersScores[f], f); if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
748 //the signifigance is the number of trees with the users score or higher
749 WScoreSig.push_back((iters-index)/(float)iters);
752 //out << "Tree# " << i << endl;
753 calculateFreqsCumuls();
760 catch(exception& e) {
761 m->errorOut(e, "UnifracWeightedCommand", "runRandomCalcs");
765 /**************************************************************************************************/
767 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
769 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
771 vector<int> processIDS;
775 //loop through and create all the processes you want
776 while (process != processors) {
780 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
783 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
785 //pass numSeqs to parent
787 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
788 m->openOutputFile(tempFile, out);
789 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t'; } out << endl;
794 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
795 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
800 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
802 //force parent to wait until all the processes are done
803 for (int i=0;i<(processors-1);i++) {
804 int temp = processIDS[i];
808 //get data created by processes
809 for (int i=0;i<(processors-1);i++) {
812 string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
813 m->openInputFile(s, in);
816 for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
824 catch(exception& e) {
825 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
830 /**************************************************************************************************/
831 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) {
833 Tree* randT = new Tree(ct);
835 Weighted weighted(includeRoot);
837 for (int h = start; h < (start+num); h++) {
839 if (m->control_pressed) { return 0; }
841 //initialize weighted score
842 string groupA = namesOfGroupCombos[h][0];
843 string groupB = namesOfGroupCombos[h][1];
848 //create a random tree with same topology as T[i], but different labels
849 randT->assembleRandomUnifracTree(groupA, groupB);
851 if (m->control_pressed) { delete randT; return 0; }
853 //get wscore of random tree
854 EstOutput randomData = weighted.getValues(randT, groupA, groupB);
856 if (m->control_pressed) { delete randT; return 0; }
859 scores[h].push_back(randomData[0]);
867 catch(exception& e) {
868 m->errorOut(e, "UnifracWeightedCommand", "driver");
872 /***********************************************************/
873 void UnifracWeightedCommand::printWeightedFile() {
877 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
879 for(int a = 0; a < numComp; a++) {
880 output->initFile(groupComb[a], tags);
882 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
883 data.push_back(it->first); data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
884 output->output(data);
890 catch(exception& e) {
891 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
897 /***********************************************************/
898 void UnifracWeightedCommand::printWSummaryFile() {
901 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
902 m->mothurOut("Tree#\tGroups\tWScore\t");
903 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
904 outSum << endl; m->mothurOutEndLine();
907 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
911 for (int i = 0; i < T.size(); i++) {
912 for (int j = 0; j < numComp; j++) {
914 if (WScoreSig[count] > (1/(float)iters)) {
915 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
916 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
917 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" + toString(WScoreSig[count]) + "\n");
919 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
920 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
921 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" + toString((1/float(iters))) + "\n");
924 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
925 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
926 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n");
933 catch(exception& e) {
934 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
938 /***********************************************************/
939 void UnifracWeightedCommand::createPhylipFile() {
943 for (int i = 0; i < T.size(); i++) {
945 string phylipFileName;
946 if ((outputForm == "lt") || (outputForm == "square")) {
947 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.phylip." + getOutputFileNameTag("phylip");
948 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
950 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.column." + getOutputFileNameTag("column");
951 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
955 m->openOutputFile(phylipFileName, out);
957 if ((outputForm == "lt") || (outputForm == "square")) {
959 out << m->getNumGroups() << endl;
962 //make matrix with scores in it
963 vector< vector<float> > dists; dists.resize(m->getNumGroups());
964 for (int i = 0; i < m->getNumGroups(); i++) {
965 dists[i].resize(m->getNumGroups(), 0.0);
968 //flip it so you can print it
969 for (int r=0; r<m->getNumGroups(); r++) {
970 for (int l = 0; l < r; l++) {
971 dists[r][l] = utreeScores[count];
972 dists[l][r] = utreeScores[count];
978 for (int r=0; r<m->getNumGroups(); r++) {
980 string name = (m->getGroups())[r];
981 if (name.length() < 10) { //pad with spaces to make compatible
982 while (name.length() < 10) { name += " "; }
985 if (outputForm == "lt") {
989 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
991 }else if (outputForm == "square") {
995 for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; }
999 for (int l = 0; l < r; l++) {
1000 string otherName = (m->getGroups())[l];
1001 if (otherName.length() < 10) { //pad with spaces to make compatible
1002 while (otherName.length() < 10) { otherName += " "; }
1005 out << name << '\t' << otherName << dists[r][l] << endl;
1012 catch(exception& e) {
1013 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
1017 /***********************************************************/
1018 int UnifracWeightedCommand::findIndex(float score, int index) {
1020 for (int i = 0; i < rScores[index].size(); i++) {
1021 if (rScores[index][i] >= score) { return i; }
1023 return rScores[index].size();
1025 catch(exception& e) {
1026 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
1031 /***********************************************************/
1033 void UnifracWeightedCommand::calculateFreqsCumuls() {
1035 //clear out old tree values
1037 rScoreFreq.resize(numComp);
1039 rCumul.resize(numComp);
1040 validScores.clear();
1042 //calculate frequency
1043 for (int f = 0; f < numComp; f++) {
1044 for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7... you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
1045 validScores[rScores[f][i]] = rScores[f][i];
1046 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
1047 if (it != rScoreFreq[f].end()) {
1048 rScoreFreq[f][rScores[f][i]]++;
1050 rScoreFreq[f][rScores[f][i]] = 1;
1056 for(int a = 0; a < numComp; a++) {
1057 float rcumul = 1.0000;
1058 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
1059 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
1060 //make rscoreFreq map and rCumul
1061 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
1062 rCumul[a][it->first] = rcumul;
1063 //get percentage of random trees with that info
1064 if (it2 != rScoreFreq[a].end()) { rScoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
1065 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
1070 catch(exception& e) {
1071 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
1075 /***********************************************************/