2 * unifracweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracweightedcommand.h"
11 #include "consensus.h"
12 #include "subsample.h"
13 #include "treereader.h"
15 //**********************************************************************************************************************
16 vector<string> UnifracWeightedCommand::setParameters(){
18 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
20 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24 CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
25 CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
26 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
27 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
28 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
41 //**********************************************************************************************************************
42 string UnifracWeightedCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root, subsample, consensus and random. tree parameter is required unless you have valid current tree file.\n";
46 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
47 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
48 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
49 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
50 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
51 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
53 helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results, as well as a consensus tree built from these trees. Default=F.\n";
54 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
55 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
56 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
57 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
58 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
62 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
66 //**********************************************************************************************************************
67 UnifracWeightedCommand::UnifracWeightedCommand(){
69 abort = true; calledHelp = true;
71 vector<string> tempOutNames;
72 outputTypes["weighted"] = tempOutNames;
73 outputTypes["wsummary"] = tempOutNames;
74 outputTypes["phylip"] = tempOutNames;
75 outputTypes["column"] = tempOutNames;
76 outputTypes["tree"] = tempOutNames;
79 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
84 /***********************************************************/
85 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
87 abort = false; calledHelp = false;
89 //allow user to run help
90 if(option == "help") { help(); abort = true; calledHelp = true; }
91 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
94 vector<string> myArray = setParameters();
96 OptionParser parser(option);
97 map<string,string> parameters=parser.getParameters();
98 map<string,string>::iterator it;
100 ValidParameters validParameter;
102 //check to make sure all parameters are valid for command
103 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
104 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
107 //initialize outputTypes
108 vector<string> tempOutNames;
109 outputTypes["weighted"] = tempOutNames;
110 outputTypes["wsummary"] = tempOutNames;
111 outputTypes["phylip"] = tempOutNames;
112 outputTypes["column"] = tempOutNames;
113 outputTypes["tree"] = tempOutNames;
115 //if the user changes the input directory command factory will send this info to us in the output parameter
116 string inputDir = validParameter.validFile(parameters, "inputdir", false);
117 if (inputDir == "not found"){ inputDir = ""; }
120 it = parameters.find("tree");
121 //user has given a template file
122 if(it != parameters.end()){
123 path = m->hasPath(it->second);
124 //if the user has not given a path then, add inputdir. else leave path alone.
125 if (path == "") { parameters["tree"] = inputDir + it->second; }
128 it = parameters.find("group");
129 //user has given a template file
130 if(it != parameters.end()){
131 path = m->hasPath(it->second);
132 //if the user has not given a path then, add inputdir. else leave path alone.
133 if (path == "") { parameters["group"] = inputDir + it->second; }
136 it = parameters.find("name");
137 //user has given a template file
138 if(it != parameters.end()){
139 path = m->hasPath(it->second);
140 //if the user has not given a path then, add inputdir. else leave path alone.
141 if (path == "") { parameters["name"] = inputDir + it->second; }
145 //check for required parameters
146 treefile = validParameter.validFile(parameters, "tree", true);
147 if (treefile == "not open") { treefile = ""; abort = true; }
148 else if (treefile == "not found") { //if there is a current design file, use it
149 treefile = m->getTreeFile();
150 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
151 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
152 }else { m->setTreeFile(treefile); }
154 //check for required parameters
155 groupfile = validParameter.validFile(parameters, "group", true);
156 if (groupfile == "not open") { abort = true; }
157 else if (groupfile == "not found") { groupfile = ""; }
158 else { m->setGroupFile(groupfile); }
160 namefile = validParameter.validFile(parameters, "name", true);
161 if (namefile == "not open") { namefile = ""; abort = true; }
162 else if (namefile == "not found") { namefile = ""; }
163 else { m->setNameFile(namefile); }
165 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
168 //check for optional parameter and set defaults
169 // ...at some point should added some additional type checking...
170 groups = validParameter.validFile(parameters, "groups", false);
171 if (groups == "not found") { groups = ""; }
173 m->splitAtDash(groups, Groups);
174 m->setGroups(Groups);
177 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
178 m->mothurConvert(itersString, iters);
180 string temp = validParameter.validFile(parameters, "distance", false);
181 if (temp == "not found") { phylip = false; outputForm = ""; }
183 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
184 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
187 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "F"; }
188 random = m->isTrue(temp);
190 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
191 includeRoot = m->isTrue(temp);
193 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
194 m->setProcessors(temp);
195 m->mothurConvert(temp, processors);
197 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
198 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
200 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
201 else { subsample = false; }
204 if (!subsample) { subsampleIters = 0; }
205 else { subsampleIters = iters; }
207 temp = validParameter.validFile(parameters, "consensus", false); if (temp == "not found") { temp = "F"; }
208 consensus = m->isTrue(temp);
210 if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; }
211 if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; }
212 if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
213 if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
215 if (namefile == "") {
216 vector<string> files; files.push_back(treefile);
217 parser.getNameFile(files);
223 catch(exception& e) {
224 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
228 /***********************************************************/
229 int UnifracWeightedCommand::execute() {
232 if (abort == true) { if (calledHelp) { return 0; } return 2; }
234 m->setTreeFile(treefile);
236 TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
237 T = reader->getTrees();
238 tmap = T[0]->getTreeMap();
239 map<string, string> nameMap = reader->getNames();
242 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
244 sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary";
245 m->openOutputFile(sumFile, outSum);
246 outputNames.push_back(sumFile); outputTypes["wsummary"].push_back(sumFile);
249 string s; //to make work with setgroups
250 Groups = m->getGroups();
251 vector<string> nameGroups = tmap->getNamesOfGroups();
252 util.setGroups(Groups, nameGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze
253 m->setGroups(Groups);
255 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
257 Weighted weighted(includeRoot);
259 int start = time(NULL);
263 //user has not set size, set size = smallest samples size
264 if (subsampleSize == -1) {
265 vector<string> temp; temp.push_back(Groups[0]);
266 subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
267 for (int i = 1; i < Groups.size(); i++) {
268 temp.clear(); temp.push_back(Groups[i]);
269 int thisSize = (tmap->getNamesSeqs(temp)).size();
270 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
272 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
273 }else { //eliminate any too small groups
274 vector<string> newGroups = Groups;
276 for (int i = 0; i < newGroups.size(); i++) {
277 vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
278 vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
279 int thisSize = thisGroupsSeqs.size();
281 if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); }
282 else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
284 m->setGroups(Groups);
288 //here in case some groups are removed by subsample
289 util.getCombos(groupComb, Groups, numComp);
291 if (numComp < processors) { processors = numComp; }
293 if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
295 //get weighted scores for users trees
296 for (int i = 0; i < T.size(); i++) {
298 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
301 rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
302 uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
304 vector<double> userData; userData.resize(numComp,0); //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
305 vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
308 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted", itersString);
309 outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
310 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted");
313 userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
314 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
317 for (int s=0; s<numComp; s++) {
318 //add users score to vector of user scores
319 uScores[s].push_back(userData[s]);
320 //save users tree score for summary file
321 utreeScores.push_back(userData[s]);
324 if (random) { runRandomCalcs(T[i], userData); }
332 vector< vector<double> > calcDistsTotals; //each iter, each groupCombos dists. this will be used to make .dist files
333 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
335 if (m->control_pressed) { break; }
337 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
338 TreeMap* newTmap = new TreeMap();
339 newTmap->getCopy(*tmap);
342 Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
344 //call new weighted function
345 vector<double> iterData; iterData.resize(numComp,0);
346 Weighted thisWeighted(includeRoot);
347 iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
349 //save data to make ave dist, std dist
350 calcDistsTotals.push_back(iterData);
353 delete subSampleTree;
355 if((thisIter+1) % 100 == 0){ m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine(); }
358 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
360 if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); }
361 if (consensus) { getConsensusTrees(calcDistsTotals, i); }
365 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
367 if (phylip) { createPhylipFile(); }
371 //clear out users groups
374 for (int i = 0; i < T.size(); i++) { delete T[i]; }
376 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
378 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
380 //set phylip file as new current phylipfile
382 itTypes = outputTypes.find("phylip");
383 if (itTypes != outputTypes.end()) {
384 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
387 //set column file as new current columnfile
388 itTypes = outputTypes.find("column");
389 if (itTypes != outputTypes.end()) {
390 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
393 m->mothurOutEndLine();
394 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
395 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
396 m->mothurOutEndLine();
401 catch(exception& e) {
402 m->errorOut(e, "UnifracWeightedCommand", "execute");
406 /**************************************************************************************************/
407 int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
409 //we need to find the average distance and standard deviation for each groups distance
412 vector<double> averages; averages.resize(numComp, 0);
413 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
414 for (int i = 0; i < dists[thisIter].size(); i++) {
415 averages[i] += dists[thisIter][i];
420 for (int i = 0; i < averages.size(); i++) { averages[i] /= (float) subsampleIters; }
422 //find standard deviation
423 vector<double> stdDev; stdDev.resize(numComp, 0);
425 for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
426 for (int j = 0; j < dists[thisIter].size(); j++) {
427 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
430 for (int i = 0; i < stdDev.size(); i++) {
431 stdDev[i] /= (float) subsampleIters;
432 stdDev[i] = sqrt(stdDev[i]);
435 //make matrix with scores in it
436 vector< vector<double> > avedists; avedists.resize(m->getNumGroups());
437 for (int i = 0; i < m->getNumGroups(); i++) {
438 avedists[i].resize(m->getNumGroups(), 0.0);
441 //make matrix with scores in it
442 vector< vector<double> > stddists; stddists.resize(m->getNumGroups());
443 for (int i = 0; i < m->getNumGroups(); i++) {
444 stddists[i].resize(m->getNumGroups(), 0.0);
447 //flip it so you can print it
449 for (int r=0; r<m->getNumGroups(); r++) {
450 for (int l = 0; l < r; l++) {
451 avedists[r][l] = averages[count];
452 avedists[l][r] = averages[count];
453 stddists[r][l] = stdDev[count];
454 stddists[l][r] = stdDev[count];
459 string aveFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.ave.dist";
460 outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName);
463 m->openOutputFile(aveFileName, out);
465 string stdFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.std.dist";
466 outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName);
469 m->openOutputFile(stdFileName, outStd);
471 if ((outputForm == "lt") || (outputForm == "square")) {
473 out << m->getNumGroups() << endl;
474 outStd << m->getNumGroups() << endl;
478 for (int r=0; r<m->getNumGroups(); r++) {
480 string name = (m->getGroups())[r];
481 if (name.length() < 10) { //pad with spaces to make compatible
482 while (name.length() < 10) { name += " "; }
485 if (outputForm == "lt") {
487 outStd << name << '\t';
490 for (int l = 0; l < r; l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t';}
491 out << endl; outStd << endl;
492 }else if (outputForm == "square") {
494 outStd << name << '\t';
497 for (int l = 0; l < m->getNumGroups(); l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t'; }
498 out << endl; outStd << endl;
501 for (int l = 0; l < r; l++) {
502 string otherName = (m->getGroups())[l];
503 if (otherName.length() < 10) { //pad with spaces to make compatible
504 while (otherName.length() < 10) { otherName += " "; }
507 out << name << '\t' << otherName << avedists[r][l] << endl;
508 outStd << name << '\t' << otherName << stddists[r][l] << endl;
517 catch(exception& e) {
518 m->errorOut(e, "UnifracWeightedCommand", "getAverageSTDMatrices");
523 /**************************************************************************************************/
524 int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
527 //used in tree constructor
530 //create treemap class from groupmap for tree class to use
532 newTmap.makeSim(m->getGroups());
534 //clear old tree names if any
535 m->Treenames.clear();
537 //fills globaldatas tree names
538 m->Treenames = m->getGroups();
540 vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
542 if (m->control_pressed) { return 0; }
545 Tree* conTree = con.getTree(newTrees);
547 //create a new filename
548 string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons.tre";
549 outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
551 m->openOutputFile(conFile, outTree);
553 if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
559 catch(exception& e) {
560 m->errorOut(e, "UnifracWeightedCommand", "getConsensusTrees");
564 /**************************************************************************************************/
566 vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
571 //create a new filename
572 string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.all.tre";
573 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
576 m->openOutputFile(outputFile, outAll);
579 for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
581 if (m->control_pressed) { break; }
583 //make matrix with scores in it
584 vector< vector<double> > sims; sims.resize(m->getNumGroups());
585 for (int j = 0; j < m->getNumGroups(); j++) {
586 sims[j].resize(m->getNumGroups(), 0.0);
590 for (int r=0; r<m->getNumGroups(); r++) {
591 for (int l = 0; l < r; l++) {
592 double sim = -(dists[i][count]-1.0);
600 Tree* tempTree = new Tree(&mytmap, sims);
601 map<string, string> empty;
602 tempTree->assembleTree(empty);
604 trees.push_back(tempTree);
607 tempTree->print(outAll);
612 if (m->control_pressed) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
616 catch(exception& e) {
617 m->errorOut(e, "UnifracWeightedCommand", "buildTrees");
621 /**************************************************************************************************/
623 int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
626 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
627 vector< vector<string> > namesOfGroupCombos;
628 for (int a=0; a<numGroups; a++) {
629 for (int l = 0; l < a; l++) {
630 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
631 namesOfGroupCombos.push_back(groups);
637 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
639 int numPairs = namesOfGroupCombos.size();
640 int numPairsPerProcessor = numPairs / processors;
642 for (int i = 0; i < processors; i++) {
643 int startPos = i * numPairsPerProcessor;
644 if(i == processors - 1){
645 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
647 lines.push_back(linePair(startPos, numPairsPerProcessor));
653 //get scores for random trees
654 for (int j = 0; j < iters; j++) {
656 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
658 driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
660 createProcesses(thisTree, namesOfGroupCombos, rScores);
663 driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
666 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
669 // m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();
673 //find the signifigance of the score for summary file
674 for (int f = 0; f < numComp; f++) {
676 sort(rScores[f].begin(), rScores[f].end());
678 //the index of the score higher than yours is returned
679 //so if you have 1000 random trees the index returned is 100
680 //then there are 900 trees with a score greater then you.
681 //giving you a signifigance of 0.900
682 int index = findIndex(usersScores[f], f); if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
684 //the signifigance is the number of trees with the users score or higher
685 WScoreSig.push_back((iters-index)/(float)iters);
688 //out << "Tree# " << i << endl;
689 calculateFreqsCumuls();
696 catch(exception& e) {
697 m->errorOut(e, "UnifracWeightedCommand", "runRandomCalcs");
701 /**************************************************************************************************/
703 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
705 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
707 vector<int> processIDS;
711 //loop through and create all the processes you want
712 while (process != processors) {
716 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
719 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
721 //pass numSeqs to parent
723 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
724 m->openOutputFile(tempFile, out);
725 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t'; } out << endl;
730 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
731 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
736 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
738 //force parent to wait until all the processes are done
739 for (int i=0;i<(processors-1);i++) {
740 int temp = processIDS[i];
744 //get data created by processes
745 for (int i=0;i<(processors-1);i++) {
748 string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
749 m->openInputFile(s, in);
752 for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
760 catch(exception& e) {
761 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
766 /**************************************************************************************************/
767 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) {
769 Tree* randT = new Tree(tmap);
771 Weighted weighted(includeRoot);
773 for (int h = start; h < (start+num); h++) {
775 if (m->control_pressed) { return 0; }
777 //initialize weighted score
778 string groupA = namesOfGroupCombos[h][0];
779 string groupB = namesOfGroupCombos[h][1];
784 //create a random tree with same topology as T[i], but different labels
785 randT->assembleRandomUnifracTree(groupA, groupB);
787 if (m->control_pressed) { delete randT; return 0; }
789 //get wscore of random tree
790 EstOutput randomData = weighted.getValues(randT, groupA, groupB);
792 if (m->control_pressed) { delete randT; return 0; }
795 scores[h].push_back(randomData[0]);
803 catch(exception& e) {
804 m->errorOut(e, "UnifracWeightedCommand", "driver");
808 /***********************************************************/
809 void UnifracWeightedCommand::printWeightedFile() {
813 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
815 for(int a = 0; a < numComp; a++) {
816 output->initFile(groupComb[a], tags);
818 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
819 data.push_back(it->first); data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
820 output->output(data);
826 catch(exception& e) {
827 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
833 /***********************************************************/
834 void UnifracWeightedCommand::printWSummaryFile() {
837 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
838 m->mothurOut("Tree#\tGroups\tWScore\t");
839 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
840 outSum << endl; m->mothurOutEndLine();
843 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
847 for (int i = 0; i < T.size(); i++) {
848 for (int j = 0; j < numComp; j++) {
850 if (WScoreSig[count] > (1/(float)iters)) {
851 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
852 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
853 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" + toString(WScoreSig[count]) + "\n");
855 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
856 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
857 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" + toString((1/float(iters))) + "\n");
860 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
861 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
862 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n");
869 catch(exception& e) {
870 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
874 /***********************************************************/
875 void UnifracWeightedCommand::createPhylipFile() {
879 for (int i = 0; i < T.size(); i++) {
881 string phylipFileName;
882 if ((outputForm == "lt") || (outputForm == "square")) {
883 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.phylip.dist";
884 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
886 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.column.dist";
887 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
891 m->openOutputFile(phylipFileName, out);
893 if ((outputForm == "lt") || (outputForm == "square")) {
895 out << m->getNumGroups() << endl;
898 //make matrix with scores in it
899 vector< vector<float> > dists; dists.resize(m->getNumGroups());
900 for (int i = 0; i < m->getNumGroups(); i++) {
901 dists[i].resize(m->getNumGroups(), 0.0);
904 //flip it so you can print it
905 for (int r=0; r<m->getNumGroups(); r++) {
906 for (int l = 0; l < r; l++) {
907 dists[r][l] = utreeScores[count];
908 dists[l][r] = utreeScores[count];
914 for (int r=0; r<m->getNumGroups(); r++) {
916 string name = (m->getGroups())[r];
917 if (name.length() < 10) { //pad with spaces to make compatible
918 while (name.length() < 10) { name += " "; }
921 if (outputForm == "lt") {
925 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
927 }else if (outputForm == "square") {
931 for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; }
935 for (int l = 0; l < r; l++) {
936 string otherName = (m->getGroups())[l];
937 if (otherName.length() < 10) { //pad with spaces to make compatible
938 while (otherName.length() < 10) { otherName += " "; }
941 out << name << '\t' << otherName << dists[r][l] << endl;
948 catch(exception& e) {
949 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
953 /***********************************************************/
954 int UnifracWeightedCommand::findIndex(float score, int index) {
956 for (int i = 0; i < rScores[index].size(); i++) {
957 if (rScores[index][i] >= score) { return i; }
959 return rScores[index].size();
961 catch(exception& e) {
962 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
967 /***********************************************************/
969 void UnifracWeightedCommand::calculateFreqsCumuls() {
971 //clear out old tree values
973 rScoreFreq.resize(numComp);
975 rCumul.resize(numComp);
978 //calculate frequency
979 for (int f = 0; f < numComp; f++) {
980 for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7... you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
981 validScores[rScores[f][i]] = rScores[f][i];
982 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
983 if (it != rScoreFreq[f].end()) {
984 rScoreFreq[f][rScores[f][i]]++;
986 rScoreFreq[f][rScores[f][i]] = 1;
992 for(int a = 0; a < numComp; a++) {
993 float rcumul = 1.0000;
994 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
995 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
996 //make rscoreFreq map and rCumul
997 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
998 rCumul[a][it->first] = rcumul;
999 //get percentage of random trees with that info
1000 if (it2 != rScoreFreq[a].end()) { rScoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
1001 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
1006 catch(exception& e) {
1007 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
1011 /***********************************************************/