2 * unifracweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracweightedcommand.h"
11 #include "consensus.h"
12 #include "subsample.h"
13 #include "treereader.h"
15 //**********************************************************************************************************************
16 vector<string> UnifracWeightedCommand::setParameters(){
18 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
20 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24 CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
25 CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
26 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
27 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
28 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
41 //**********************************************************************************************************************
42 string UnifracWeightedCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root, subsample, consensus and random. tree parameter is required unless you have valid current tree file.\n";
46 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
47 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
48 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
49 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
50 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
51 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
53 helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results, as well as a consensus tree built from these trees. Default=F.\n";
54 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
55 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
56 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
57 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
58 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
62 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
66 //**********************************************************************************************************************
67 UnifracWeightedCommand::UnifracWeightedCommand(){
69 abort = true; calledHelp = true;
71 vector<string> tempOutNames;
72 outputTypes["weighted"] = tempOutNames;
73 outputTypes["wsummary"] = tempOutNames;
74 outputTypes["phylip"] = tempOutNames;
75 outputTypes["column"] = tempOutNames;
76 outputTypes["tree"] = tempOutNames;
79 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
84 /***********************************************************/
85 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
87 abort = false; calledHelp = false;
89 //allow user to run help
90 if(option == "help") { help(); abort = true; calledHelp = true; }
91 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
94 vector<string> myArray = setParameters();
96 OptionParser parser(option);
97 map<string,string> parameters=parser.getParameters();
98 map<string,string>::iterator it;
100 ValidParameters validParameter;
102 //check to make sure all parameters are valid for command
103 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
104 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
107 //initialize outputTypes
108 vector<string> tempOutNames;
109 outputTypes["weighted"] = tempOutNames;
110 outputTypes["wsummary"] = tempOutNames;
111 outputTypes["phylip"] = tempOutNames;
112 outputTypes["column"] = tempOutNames;
113 outputTypes["tree"] = tempOutNames;
115 //if the user changes the input directory command factory will send this info to us in the output parameter
116 string inputDir = validParameter.validFile(parameters, "inputdir", false);
117 if (inputDir == "not found"){ inputDir = ""; }
120 it = parameters.find("tree");
121 //user has given a template file
122 if(it != parameters.end()){
123 path = m->hasPath(it->second);
124 //if the user has not given a path then, add inputdir. else leave path alone.
125 if (path == "") { parameters["tree"] = inputDir + it->second; }
128 it = parameters.find("group");
129 //user has given a template file
130 if(it != parameters.end()){
131 path = m->hasPath(it->second);
132 //if the user has not given a path then, add inputdir. else leave path alone.
133 if (path == "") { parameters["group"] = inputDir + it->second; }
136 it = parameters.find("name");
137 //user has given a template file
138 if(it != parameters.end()){
139 path = m->hasPath(it->second);
140 //if the user has not given a path then, add inputdir. else leave path alone.
141 if (path == "") { parameters["name"] = inputDir + it->second; }
145 //check for required parameters
146 treefile = validParameter.validFile(parameters, "tree", true);
147 if (treefile == "not open") { treefile = ""; abort = true; }
148 else if (treefile == "not found") { //if there is a current design file, use it
149 treefile = m->getTreeFile();
150 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
151 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
152 }else { m->setTreeFile(treefile); }
154 //check for required parameters
155 groupfile = validParameter.validFile(parameters, "group", true);
156 if (groupfile == "not open") { abort = true; }
157 else if (groupfile == "not found") { groupfile = ""; }
158 else { m->setGroupFile(groupfile); }
160 namefile = validParameter.validFile(parameters, "name", true);
161 if (namefile == "not open") { namefile = ""; abort = true; }
162 else if (namefile == "not found") { namefile = ""; }
163 else { m->setNameFile(namefile); }
165 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
168 //check for optional parameter and set defaults
169 // ...at some point should added some additional type checking...
170 groups = validParameter.validFile(parameters, "groups", false);
171 if (groups == "not found") { groups = ""; }
173 m->splitAtDash(groups, Groups);
174 m->setGroups(Groups);
177 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
178 m->mothurConvert(itersString, iters);
180 string temp = validParameter.validFile(parameters, "distance", false);
181 if (temp == "not found") { phylip = false; outputForm = ""; }
183 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
184 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
187 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "F"; }
188 random = m->isTrue(temp);
190 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
191 includeRoot = m->isTrue(temp);
193 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
194 m->setProcessors(temp);
195 m->mothurConvert(temp, processors);
197 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
198 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
200 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
201 else { subsample = false; }
204 if (!subsample) { subsampleIters = 0; }
205 else { subsampleIters = iters; }
207 temp = validParameter.validFile(parameters, "consensus", false); if (temp == "not found") { temp = "F"; }
208 consensus = m->isTrue(temp);
210 if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; }
211 if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; }
212 if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
213 if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
215 if (namefile == "") {
216 vector<string> files; files.push_back(treefile);
217 parser.getNameFile(files);
223 catch(exception& e) {
224 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
228 /***********************************************************/
229 int UnifracWeightedCommand::execute() {
232 if (abort == true) { if (calledHelp) { return 0; } return 2; }
234 m->setTreeFile(treefile);
236 TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
237 T = reader->getTrees();
238 tmap = T[0]->getTreeMap();
239 map<string, string> nameMap = reader->getNames();
240 map<string, string> unique2Dup = reader->getNameMap();
243 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
245 sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
246 m->openOutputFile(sumFile, outSum);
247 outputNames.push_back(sumFile); outputTypes["wsummary"].push_back(sumFile);
250 string s; //to make work with setgroups
251 Groups = m->getGroups();
252 vector<string> nameGroups = tmap->getNamesOfGroups();
253 util.setGroups(Groups, nameGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze
254 m->setGroups(Groups);
256 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
258 Weighted weighted(includeRoot);
260 int start = time(NULL);
264 //user has not set size, set size = smallest samples size
265 if (subsampleSize == -1) {
266 vector<string> temp; temp.push_back(Groups[0]);
267 subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
268 for (int i = 1; i < Groups.size(); i++) {
269 temp.clear(); temp.push_back(Groups[i]);
270 int thisSize = (tmap->getNamesSeqs(temp)).size();
271 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
273 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
274 }else { //eliminate any too small groups
275 vector<string> newGroups = Groups;
277 for (int i = 0; i < newGroups.size(); i++) {
278 vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
279 vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
280 int thisSize = thisGroupsSeqs.size();
282 if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); }
283 else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
285 m->setGroups(Groups);
289 //here in case some groups are removed by subsample
290 util.getCombos(groupComb, Groups, numComp);
292 if (numComp < processors) { processors = numComp; }
294 if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
296 //get weighted scores for users trees
297 for (int i = 0; i < T.size(); i++) {
299 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
302 rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
303 uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
305 vector<double> userData; userData.resize(numComp,0); //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
306 vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
309 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted", itersString);
310 outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
311 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
314 userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
315 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
318 for (int s=0; s<numComp; s++) {
319 //add users score to vector of user scores
320 uScores[s].push_back(userData[s]);
321 //save users tree score for summary file
322 utreeScores.push_back(userData[s]);
325 if (random) { runRandomCalcs(T[i], userData); }
333 vector< vector<double> > calcDistsTotals; //each iter, each groupCombos dists. this will be used to make .dist files
334 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
336 if (m->control_pressed) { break; }
338 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
339 TreeMap* newTmap = new TreeMap();
340 //newTmap->getCopy(*tmap);
343 //Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
345 //uses method of setting groups to doNotIncludeMe
347 Tree* subSampleTree = sample.getSample(T[i], tmap, newTmap, subsampleSize, unique2Dup);
349 //call new weighted function
350 vector<double> iterData; iterData.resize(numComp,0);
351 Weighted thisWeighted(includeRoot);
352 iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
354 //save data to make ave dist, std dist
355 calcDistsTotals.push_back(iterData);
358 delete subSampleTree;
360 if((thisIter+1) % 100 == 0){ m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine(); }
363 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
365 if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); }
366 if (consensus) { getConsensusTrees(calcDistsTotals, i); }
370 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
372 if (phylip) { createPhylipFile(); }
376 //clear out users groups
379 for (int i = 0; i < T.size(); i++) { delete T[i]; }
381 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
383 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
385 //set phylip file as new current phylipfile
387 itTypes = outputTypes.find("phylip");
388 if (itTypes != outputTypes.end()) {
389 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
392 //set column file as new current columnfile
393 itTypes = outputTypes.find("column");
394 if (itTypes != outputTypes.end()) {
395 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
398 m->mothurOutEndLine();
399 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
400 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
401 m->mothurOutEndLine();
406 catch(exception& e) {
407 m->errorOut(e, "UnifracWeightedCommand", "execute");
411 /**************************************************************************************************/
412 int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
414 //we need to find the average distance and standard deviation for each groups distance
417 vector<double> averages; averages.resize(numComp, 0);
418 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
419 for (int i = 0; i < dists[thisIter].size(); i++) {
420 averages[i] += dists[thisIter][i];
425 for (int i = 0; i < averages.size(); i++) { averages[i] /= (float) subsampleIters; }
427 //find standard deviation
428 vector<double> stdDev; stdDev.resize(numComp, 0);
430 for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
431 for (int j = 0; j < dists[thisIter].size(); j++) {
432 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
435 for (int i = 0; i < stdDev.size(); i++) {
436 stdDev[i] /= (float) subsampleIters;
437 stdDev[i] = sqrt(stdDev[i]);
440 //make matrix with scores in it
441 vector< vector<double> > avedists; avedists.resize(m->getNumGroups());
442 for (int i = 0; i < m->getNumGroups(); i++) {
443 avedists[i].resize(m->getNumGroups(), 0.0);
446 //make matrix with scores in it
447 vector< vector<double> > stddists; stddists.resize(m->getNumGroups());
448 for (int i = 0; i < m->getNumGroups(); i++) {
449 stddists[i].resize(m->getNumGroups(), 0.0);
452 //flip it so you can print it
454 for (int r=0; r<m->getNumGroups(); r++) {
455 for (int l = 0; l < r; l++) {
456 avedists[r][l] = averages[count];
457 avedists[l][r] = averages[count];
458 stddists[r][l] = stdDev[count];
459 stddists[l][r] = stdDev[count];
464 string aveFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.ave.dist";
465 outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);
468 m->openOutputFile(aveFileName, out);
470 string stdFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.std.dist";
471 outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName);
474 m->openOutputFile(stdFileName, outStd);
476 if ((outputForm == "lt") || (outputForm == "square")) {
478 out << m->getNumGroups() << endl;
479 outStd << m->getNumGroups() << endl;
483 for (int r=0; r<m->getNumGroups(); r++) {
485 string name = (m->getGroups())[r];
486 if (name.length() < 10) { //pad with spaces to make compatible
487 while (name.length() < 10) { name += " "; }
490 if (outputForm == "lt") {
492 outStd << name << '\t';
495 for (int l = 0; l < r; l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t';}
496 out << endl; outStd << endl;
497 }else if (outputForm == "square") {
499 outStd << name << '\t';
502 for (int l = 0; l < m->getNumGroups(); l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t'; }
503 out << endl; outStd << endl;
506 for (int l = 0; l < r; l++) {
507 string otherName = (m->getGroups())[l];
508 if (otherName.length() < 10) { //pad with spaces to make compatible
509 while (otherName.length() < 10) { otherName += " "; }
512 out << name << '\t' << otherName << avedists[r][l] << endl;
513 outStd << name << '\t' << otherName << stddists[r][l] << endl;
522 catch(exception& e) {
523 m->errorOut(e, "UnifracWeightedCommand", "getAverageSTDMatrices");
528 /**************************************************************************************************/
529 int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
532 //used in tree constructor
535 //create treemap class from groupmap for tree class to use
537 newTmap.makeSim(m->getGroups());
539 //clear old tree names if any
540 m->Treenames.clear();
542 //fills globaldatas tree names
543 m->Treenames = m->getGroups();
545 vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
547 if (m->control_pressed) { return 0; }
550 Tree* conTree = con.getTree(newTrees);
552 //create a new filename
553 string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons.tre";
554 outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
556 m->openOutputFile(conFile, outTree);
558 if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
564 catch(exception& e) {
565 m->errorOut(e, "UnifracWeightedCommand", "getConsensusTrees");
569 /**************************************************************************************************/
571 vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
576 //create a new filename
577 string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.all.tre";
578 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
581 m->openOutputFile(outputFile, outAll);
584 for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
586 if (m->control_pressed) { break; }
588 //make matrix with scores in it
589 vector< vector<double> > sims; sims.resize(m->getNumGroups());
590 for (int j = 0; j < m->getNumGroups(); j++) {
591 sims[j].resize(m->getNumGroups(), 0.0);
595 for (int r=0; r<m->getNumGroups(); r++) {
596 for (int l = 0; l < r; l++) {
597 double sim = -(dists[i][count]-1.0);
605 Tree* tempTree = new Tree(&mytmap, sims);
606 map<string, string> empty;
607 tempTree->assembleTree(empty);
609 trees.push_back(tempTree);
612 tempTree->print(outAll);
617 if (m->control_pressed) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
621 catch(exception& e) {
622 m->errorOut(e, "UnifracWeightedCommand", "buildTrees");
626 /**************************************************************************************************/
628 int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
631 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
632 vector< vector<string> > namesOfGroupCombos;
633 for (int a=0; a<numGroups; a++) {
634 for (int l = 0; l < a; l++) {
635 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
636 namesOfGroupCombos.push_back(groups);
642 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
644 int numPairs = namesOfGroupCombos.size();
645 int numPairsPerProcessor = numPairs / processors;
647 for (int i = 0; i < processors; i++) {
648 int startPos = i * numPairsPerProcessor;
649 if(i == processors - 1){
650 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
652 lines.push_back(linePair(startPos, numPairsPerProcessor));
658 //get scores for random trees
659 for (int j = 0; j < iters; j++) {
661 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
663 driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
665 createProcesses(thisTree, namesOfGroupCombos, rScores);
668 driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
671 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
674 // m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();
678 //find the signifigance of the score for summary file
679 for (int f = 0; f < numComp; f++) {
681 sort(rScores[f].begin(), rScores[f].end());
683 //the index of the score higher than yours is returned
684 //so if you have 1000 random trees the index returned is 100
685 //then there are 900 trees with a score greater then you.
686 //giving you a signifigance of 0.900
687 int index = findIndex(usersScores[f], f); if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
689 //the signifigance is the number of trees with the users score or higher
690 WScoreSig.push_back((iters-index)/(float)iters);
693 //out << "Tree# " << i << endl;
694 calculateFreqsCumuls();
701 catch(exception& e) {
702 m->errorOut(e, "UnifracWeightedCommand", "runRandomCalcs");
706 /**************************************************************************************************/
708 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
710 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
712 vector<int> processIDS;
716 //loop through and create all the processes you want
717 while (process != processors) {
721 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
724 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
726 //pass numSeqs to parent
728 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
729 m->openOutputFile(tempFile, out);
730 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t'; } out << endl;
735 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
736 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
741 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
743 //force parent to wait until all the processes are done
744 for (int i=0;i<(processors-1);i++) {
745 int temp = processIDS[i];
749 //get data created by processes
750 for (int i=0;i<(processors-1);i++) {
753 string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
754 m->openInputFile(s, in);
757 for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
765 catch(exception& e) {
766 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
771 /**************************************************************************************************/
772 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) {
774 Tree* randT = new Tree(tmap);
776 Weighted weighted(includeRoot);
778 for (int h = start; h < (start+num); h++) {
780 if (m->control_pressed) { return 0; }
782 //initialize weighted score
783 string groupA = namesOfGroupCombos[h][0];
784 string groupB = namesOfGroupCombos[h][1];
789 //create a random tree with same topology as T[i], but different labels
790 randT->assembleRandomUnifracTree(groupA, groupB);
792 if (m->control_pressed) { delete randT; return 0; }
794 //get wscore of random tree
795 EstOutput randomData = weighted.getValues(randT, groupA, groupB);
797 if (m->control_pressed) { delete randT; return 0; }
800 scores[h].push_back(randomData[0]);
808 catch(exception& e) {
809 m->errorOut(e, "UnifracWeightedCommand", "driver");
813 /***********************************************************/
814 void UnifracWeightedCommand::printWeightedFile() {
818 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
820 for(int a = 0; a < numComp; a++) {
821 output->initFile(groupComb[a], tags);
823 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
824 data.push_back(it->first); data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
825 output->output(data);
831 catch(exception& e) {
832 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
838 /***********************************************************/
839 void UnifracWeightedCommand::printWSummaryFile() {
842 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
843 m->mothurOut("Tree#\tGroups\tWScore\t");
844 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
845 outSum << endl; m->mothurOutEndLine();
848 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
852 for (int i = 0; i < T.size(); i++) {
853 for (int j = 0; j < numComp; j++) {
855 if (WScoreSig[count] > (1/(float)iters)) {
856 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
857 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
858 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" + toString(WScoreSig[count]) + "\n");
860 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
861 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
862 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" + toString((1/float(iters))) + "\n");
865 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
866 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
867 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n");
874 catch(exception& e) {
875 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
879 /***********************************************************/
880 void UnifracWeightedCommand::createPhylipFile() {
884 for (int i = 0; i < T.size(); i++) {
886 string phylipFileName;
887 if ((outputForm == "lt") || (outputForm == "square")) {
888 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.phylip.dist";
889 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
891 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.column.dist";
892 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
896 m->openOutputFile(phylipFileName, out);
898 if ((outputForm == "lt") || (outputForm == "square")) {
900 out << m->getNumGroups() << endl;
903 //make matrix with scores in it
904 vector< vector<float> > dists; dists.resize(m->getNumGroups());
905 for (int i = 0; i < m->getNumGroups(); i++) {
906 dists[i].resize(m->getNumGroups(), 0.0);
909 //flip it so you can print it
910 for (int r=0; r<m->getNumGroups(); r++) {
911 for (int l = 0; l < r; l++) {
912 dists[r][l] = utreeScores[count];
913 dists[l][r] = utreeScores[count];
919 for (int r=0; r<m->getNumGroups(); r++) {
921 string name = (m->getGroups())[r];
922 if (name.length() < 10) { //pad with spaces to make compatible
923 while (name.length() < 10) { name += " "; }
926 if (outputForm == "lt") {
930 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
932 }else if (outputForm == "square") {
936 for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; }
940 for (int l = 0; l < r; l++) {
941 string otherName = (m->getGroups())[l];
942 if (otherName.length() < 10) { //pad with spaces to make compatible
943 while (otherName.length() < 10) { otherName += " "; }
946 out << name << '\t' << otherName << dists[r][l] << endl;
953 catch(exception& e) {
954 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
958 /***********************************************************/
959 int UnifracWeightedCommand::findIndex(float score, int index) {
961 for (int i = 0; i < rScores[index].size(); i++) {
962 if (rScores[index][i] >= score) { return i; }
964 return rScores[index].size();
966 catch(exception& e) {
967 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
972 /***********************************************************/
974 void UnifracWeightedCommand::calculateFreqsCumuls() {
976 //clear out old tree values
978 rScoreFreq.resize(numComp);
980 rCumul.resize(numComp);
983 //calculate frequency
984 for (int f = 0; f < numComp; f++) {
985 for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7... you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
986 validScores[rScores[f][i]] = rScores[f][i];
987 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
988 if (it != rScoreFreq[f].end()) {
989 rScoreFreq[f][rScores[f][i]]++;
991 rScoreFreq[f][rScores[f][i]] = 1;
997 for(int a = 0; a < numComp; a++) {
998 float rcumul = 1.0000;
999 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
1000 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
1001 //make rscoreFreq map and rCumul
1002 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
1003 rCumul[a][it->first] = rcumul;
1004 //get percentage of random trees with that info
1005 if (it2 != rScoreFreq[a].end()) { rScoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
1006 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
1011 catch(exception& e) {
1012 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
1016 /***********************************************************/