2 * unifracweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracweightedcommand.h"
11 #include "consensus.h"
12 #include "subsample.h"
13 #include "treereader.h"
15 //**********************************************************************************************************************
16 vector<string> UnifracWeightedCommand::setParameters(){
18 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
20 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24 CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
25 CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
26 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
27 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
28 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
41 //**********************************************************************************************************************
42 string UnifracWeightedCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root, subsample, consensus and random. tree parameter is required unless you have valid current tree file.\n";
46 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
47 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
48 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
49 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
50 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
51 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
53 helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results, as well as a consensus tree built from these trees. Default=F.\n";
54 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
55 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
56 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
57 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
58 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
62 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
66 //**********************************************************************************************************************
67 string UnifracWeightedCommand::getOutputFileNameTag(string type, string inputName=""){
69 string outputFileName = "";
70 map<string, vector<string> >::iterator it;
72 //is this a type this command creates
73 it = outputTypes.find(type);
74 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
76 if (type == "weighted") { outputFileName = "weighted"; }
77 else if (type == "wsummary") { outputFileName = "wsummary"; }
78 else if (type == "phylip") { outputFileName = "dist"; }
79 else if (type == "column") { outputFileName = "dist"; }
80 else if (type == "tree") { outputFileName = "tre"; }
81 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
83 return outputFileName;
86 m->errorOut(e, "UnifracWeightedCommand", "getOutputFileNameTag");
90 //**********************************************************************************************************************
91 UnifracWeightedCommand::UnifracWeightedCommand(){
93 abort = true; calledHelp = true;
95 vector<string> tempOutNames;
96 outputTypes["weighted"] = tempOutNames;
97 outputTypes["wsummary"] = tempOutNames;
98 outputTypes["phylip"] = tempOutNames;
99 outputTypes["column"] = tempOutNames;
100 outputTypes["tree"] = tempOutNames;
102 catch(exception& e) {
103 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
108 /***********************************************************/
109 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
111 abort = false; calledHelp = false;
113 //allow user to run help
114 if(option == "help") { help(); abort = true; calledHelp = true; }
115 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
118 vector<string> myArray = setParameters();
120 OptionParser parser(option);
121 map<string,string> parameters=parser.getParameters();
122 map<string,string>::iterator it;
124 ValidParameters validParameter;
126 //check to make sure all parameters are valid for command
127 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
128 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
131 //initialize outputTypes
132 vector<string> tempOutNames;
133 outputTypes["weighted"] = tempOutNames;
134 outputTypes["wsummary"] = tempOutNames;
135 outputTypes["phylip"] = tempOutNames;
136 outputTypes["column"] = tempOutNames;
137 outputTypes["tree"] = tempOutNames;
139 //if the user changes the input directory command factory will send this info to us in the output parameter
140 string inputDir = validParameter.validFile(parameters, "inputdir", false);
141 if (inputDir == "not found"){ inputDir = ""; }
144 it = parameters.find("tree");
145 //user has given a template file
146 if(it != parameters.end()){
147 path = m->hasPath(it->second);
148 //if the user has not given a path then, add inputdir. else leave path alone.
149 if (path == "") { parameters["tree"] = inputDir + it->second; }
152 it = parameters.find("group");
153 //user has given a template file
154 if(it != parameters.end()){
155 path = m->hasPath(it->second);
156 //if the user has not given a path then, add inputdir. else leave path alone.
157 if (path == "") { parameters["group"] = inputDir + it->second; }
160 it = parameters.find("name");
161 //user has given a template file
162 if(it != parameters.end()){
163 path = m->hasPath(it->second);
164 //if the user has not given a path then, add inputdir. else leave path alone.
165 if (path == "") { parameters["name"] = inputDir + it->second; }
169 //check for required parameters
170 treefile = validParameter.validFile(parameters, "tree", true);
171 if (treefile == "not open") { treefile = ""; abort = true; }
172 else if (treefile == "not found") { //if there is a current design file, use it
173 treefile = m->getTreeFile();
174 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
175 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
176 }else { m->setTreeFile(treefile); }
178 //check for required parameters
179 groupfile = validParameter.validFile(parameters, "group", true);
180 if (groupfile == "not open") { abort = true; }
181 else if (groupfile == "not found") { groupfile = ""; }
182 else { m->setGroupFile(groupfile); }
184 namefile = validParameter.validFile(parameters, "name", true);
185 if (namefile == "not open") { namefile = ""; abort = true; }
186 else if (namefile == "not found") { namefile = ""; }
187 else { m->setNameFile(namefile); }
189 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
192 //check for optional parameter and set defaults
193 // ...at some point should added some additional type checking...
194 groups = validParameter.validFile(parameters, "groups", false);
195 if (groups == "not found") { groups = ""; }
197 m->splitAtDash(groups, Groups);
198 m->setGroups(Groups);
201 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
202 m->mothurConvert(itersString, iters);
204 string temp = validParameter.validFile(parameters, "distance", false);
205 if (temp == "not found") { phylip = false; outputForm = ""; }
207 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
208 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
211 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "F"; }
212 random = m->isTrue(temp);
214 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
215 includeRoot = m->isTrue(temp);
217 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
218 m->setProcessors(temp);
219 m->mothurConvert(temp, processors);
221 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
222 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
224 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
225 else { subsample = false; }
228 if (!subsample) { subsampleIters = 0; }
229 else { subsampleIters = iters; }
231 temp = validParameter.validFile(parameters, "consensus", false); if (temp == "not found") { temp = "F"; }
232 consensus = m->isTrue(temp);
234 if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; }
235 if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; }
236 if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
237 if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
239 if (namefile == "") {
240 vector<string> files; files.push_back(treefile);
241 parser.getNameFile(files);
247 catch(exception& e) {
248 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
252 /***********************************************************/
253 int UnifracWeightedCommand::execute() {
256 if (abort == true) { if (calledHelp) { return 0; } return 2; }
258 m->setTreeFile(treefile);
260 TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
261 T = reader->getTrees();
262 tmap = T[0]->getTreeMap();
263 map<string, string> nameMap = reader->getNames();
264 map<string, string> unique2Dup = reader->getNameMap();
267 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
269 sumFile = outputDir + m->getSimpleName(treefile) + getOutputFileNameTag("wsummary");
270 m->openOutputFile(sumFile, outSum);
271 outputNames.push_back(sumFile); outputTypes["wsummary"].push_back(sumFile);
274 string s; //to make work with setgroups
275 Groups = m->getGroups();
276 vector<string> nameGroups = tmap->getNamesOfGroups();
277 util.setGroups(Groups, nameGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze
278 m->setGroups(Groups);
280 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
282 Weighted weighted(includeRoot);
284 int start = time(NULL);
288 //user has not set size, set size = smallest samples size
289 if (subsampleSize == -1) {
290 vector<string> temp; temp.push_back(Groups[0]);
291 subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
292 for (int i = 1; i < Groups.size(); i++) {
293 temp.clear(); temp.push_back(Groups[i]);
294 int thisSize = (tmap->getNamesSeqs(temp)).size();
295 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
297 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
298 }else { //eliminate any too small groups
299 vector<string> newGroups = Groups;
301 for (int i = 0; i < newGroups.size(); i++) {
302 vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
303 vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
304 int thisSize = thisGroupsSeqs.size();
306 if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); }
307 else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
309 m->setGroups(Groups);
313 //here in case some groups are removed by subsample
314 util.getCombos(groupComb, Groups, numComp);
316 if (numComp < processors) { processors = numComp; }
318 if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
320 //get weighted scores for users trees
321 for (int i = 0; i < T.size(); i++) {
323 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
326 rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
327 uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
329 vector<double> userData; userData.resize(numComp,0); //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
330 vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
333 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("weighted"), itersString);
334 outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("weighted"));
335 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("weighted"));
338 userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
339 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
342 for (int s=0; s<numComp; s++) {
343 //add users score to vector of user scores
344 uScores[s].push_back(userData[s]);
345 //save users tree score for summary file
346 utreeScores.push_back(userData[s]);
349 if (random) { runRandomCalcs(T[i], userData); }
357 vector< vector<double> > calcDistsTotals; //each iter, each groupCombos dists. this will be used to make .dist files
358 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
360 if (m->control_pressed) { break; }
362 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
363 TreeMap* newTmap = new TreeMap();
364 //newTmap->getCopy(*tmap);
367 //Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
369 //uses method of setting groups to doNotIncludeMe
371 Tree* subSampleTree = sample.getSample(T[i], tmap, newTmap, subsampleSize, unique2Dup);
373 //call new weighted function
374 vector<double> iterData; iterData.resize(numComp,0);
375 Weighted thisWeighted(includeRoot);
376 iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
378 //save data to make ave dist, std dist
379 calcDistsTotals.push_back(iterData);
382 delete subSampleTree;
384 if((thisIter+1) % 100 == 0){ m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine(); }
387 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
389 if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); }
390 if (consensus) { getConsensusTrees(calcDistsTotals, i); }
394 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
396 if (phylip) { createPhylipFile(); }
400 //clear out users groups
403 for (int i = 0; i < T.size(); i++) { delete T[i]; }
405 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
407 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
409 //set phylip file as new current phylipfile
411 itTypes = outputTypes.find("phylip");
412 if (itTypes != outputTypes.end()) {
413 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
416 //set column file as new current columnfile
417 itTypes = outputTypes.find("column");
418 if (itTypes != outputTypes.end()) {
419 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
422 m->mothurOutEndLine();
423 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
424 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
425 m->mothurOutEndLine();
430 catch(exception& e) {
431 m->errorOut(e, "UnifracWeightedCommand", "execute");
435 /**************************************************************************************************/
436 int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
438 //we need to find the average distance and standard deviation for each groups distance
441 vector<double> averages; averages.resize(numComp, 0);
442 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
443 for (int i = 0; i < dists[thisIter].size(); i++) {
444 averages[i] += dists[thisIter][i];
449 for (int i = 0; i < averages.size(); i++) { averages[i] /= (float) subsampleIters; }
451 //find standard deviation
452 vector<double> stdDev; stdDev.resize(numComp, 0);
454 for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
455 for (int j = 0; j < dists[thisIter].size(); j++) {
456 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
459 for (int i = 0; i < stdDev.size(); i++) {
460 stdDev[i] /= (float) subsampleIters;
461 stdDev[i] = sqrt(stdDev[i]);
464 //make matrix with scores in it
465 vector< vector<double> > avedists; avedists.resize(m->getNumGroups());
466 for (int i = 0; i < m->getNumGroups(); i++) {
467 avedists[i].resize(m->getNumGroups(), 0.0);
470 //make matrix with scores in it
471 vector< vector<double> > stddists; stddists.resize(m->getNumGroups());
472 for (int i = 0; i < m->getNumGroups(); i++) {
473 stddists[i].resize(m->getNumGroups(), 0.0);
476 //flip it so you can print it
478 for (int r=0; r<m->getNumGroups(); r++) {
479 for (int l = 0; l < r; l++) {
480 avedists[r][l] = averages[count];
481 avedists[l][r] = averages[count];
482 stddists[r][l] = stdDev[count];
483 stddists[l][r] = stdDev[count];
488 string aveFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.ave." + getOutputFileNameTag("phylip");
489 if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); }
490 else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName); }
492 m->openOutputFile(aveFileName, out);
494 string stdFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.std." + getOutputFileNameTag("phylip");
495 if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
496 else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }
498 m->openOutputFile(stdFileName, outStd);
500 if ((outputForm == "lt") || (outputForm == "square")) {
502 out << m->getNumGroups() << endl;
503 outStd << m->getNumGroups() << endl;
507 for (int r=0; r<m->getNumGroups(); r++) {
509 string name = (m->getGroups())[r];
510 if (name.length() < 10) { //pad with spaces to make compatible
511 while (name.length() < 10) { name += " "; }
514 if (outputForm == "lt") {
516 outStd << name << '\t';
519 for (int l = 0; l < r; l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t';}
520 out << endl; outStd << endl;
521 }else if (outputForm == "square") {
523 outStd << name << '\t';
526 for (int l = 0; l < m->getNumGroups(); l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t'; }
527 out << endl; outStd << endl;
530 for (int l = 0; l < r; l++) {
531 string otherName = (m->getGroups())[l];
532 if (otherName.length() < 10) { //pad with spaces to make compatible
533 while (otherName.length() < 10) { otherName += " "; }
536 out << name << '\t' << otherName << avedists[r][l] << endl;
537 outStd << name << '\t' << otherName << stddists[r][l] << endl;
546 catch(exception& e) {
547 m->errorOut(e, "UnifracWeightedCommand", "getAverageSTDMatrices");
552 /**************************************************************************************************/
553 int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
556 //used in tree constructor
559 //create treemap class from groupmap for tree class to use
561 newTmap.makeSim(m->getGroups());
563 //clear old tree names if any
564 m->Treenames.clear();
566 //fills globaldatas tree names
567 m->Treenames = m->getGroups();
569 vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
571 if (m->control_pressed) { return 0; }
574 Tree* conTree = con.getTree(newTrees);
576 //create a new filename
577 string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons." + getOutputFileNameTag("tree");
578 outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
580 m->openOutputFile(conFile, outTree);
582 if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
588 catch(exception& e) {
589 m->errorOut(e, "UnifracWeightedCommand", "getConsensusTrees");
593 /**************************************************************************************************/
595 vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
600 //create a new filename
601 string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.all." + getOutputFileNameTag("tree");
602 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
605 m->openOutputFile(outputFile, outAll);
608 for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
610 if (m->control_pressed) { break; }
612 //make matrix with scores in it
613 vector< vector<double> > sims; sims.resize(m->getNumGroups());
614 for (int j = 0; j < m->getNumGroups(); j++) {
615 sims[j].resize(m->getNumGroups(), 0.0);
619 for (int r=0; r<m->getNumGroups(); r++) {
620 for (int l = 0; l < r; l++) {
621 double sim = -(dists[i][count]-1.0);
629 Tree* tempTree = new Tree(&mytmap, sims);
630 map<string, string> empty;
631 tempTree->assembleTree(empty);
633 trees.push_back(tempTree);
636 tempTree->print(outAll);
641 if (m->control_pressed) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
645 catch(exception& e) {
646 m->errorOut(e, "UnifracWeightedCommand", "buildTrees");
650 /**************************************************************************************************/
652 int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
655 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
656 vector< vector<string> > namesOfGroupCombos;
657 for (int a=0; a<numGroups; a++) {
658 for (int l = 0; l < a; l++) {
659 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
660 namesOfGroupCombos.push_back(groups);
666 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
668 int numPairs = namesOfGroupCombos.size();
669 int numPairsPerProcessor = numPairs / processors;
671 for (int i = 0; i < processors; i++) {
672 int startPos = i * numPairsPerProcessor;
673 if(i == processors - 1){
674 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
676 lines.push_back(linePair(startPos, numPairsPerProcessor));
682 //get scores for random trees
683 for (int j = 0; j < iters; j++) {
685 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
687 driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
689 createProcesses(thisTree, namesOfGroupCombos, rScores);
692 driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
695 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
698 // m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();
702 //find the signifigance of the score for summary file
703 for (int f = 0; f < numComp; f++) {
705 sort(rScores[f].begin(), rScores[f].end());
707 //the index of the score higher than yours is returned
708 //so if you have 1000 random trees the index returned is 100
709 //then there are 900 trees with a score greater then you.
710 //giving you a signifigance of 0.900
711 int index = findIndex(usersScores[f], f); if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
713 //the signifigance is the number of trees with the users score or higher
714 WScoreSig.push_back((iters-index)/(float)iters);
717 //out << "Tree# " << i << endl;
718 calculateFreqsCumuls();
725 catch(exception& e) {
726 m->errorOut(e, "UnifracWeightedCommand", "runRandomCalcs");
730 /**************************************************************************************************/
732 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
734 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
736 vector<int> processIDS;
740 //loop through and create all the processes you want
741 while (process != processors) {
745 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
748 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
750 //pass numSeqs to parent
752 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
753 m->openOutputFile(tempFile, out);
754 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t'; } out << endl;
759 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
760 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
765 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
767 //force parent to wait until all the processes are done
768 for (int i=0;i<(processors-1);i++) {
769 int temp = processIDS[i];
773 //get data created by processes
774 for (int i=0;i<(processors-1);i++) {
777 string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
778 m->openInputFile(s, in);
781 for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
789 catch(exception& e) {
790 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
795 /**************************************************************************************************/
796 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) {
798 Tree* randT = new Tree(tmap);
800 Weighted weighted(includeRoot);
802 for (int h = start; h < (start+num); h++) {
804 if (m->control_pressed) { return 0; }
806 //initialize weighted score
807 string groupA = namesOfGroupCombos[h][0];
808 string groupB = namesOfGroupCombos[h][1];
813 //create a random tree with same topology as T[i], but different labels
814 randT->assembleRandomUnifracTree(groupA, groupB);
816 if (m->control_pressed) { delete randT; return 0; }
818 //get wscore of random tree
819 EstOutput randomData = weighted.getValues(randT, groupA, groupB);
821 if (m->control_pressed) { delete randT; return 0; }
824 scores[h].push_back(randomData[0]);
832 catch(exception& e) {
833 m->errorOut(e, "UnifracWeightedCommand", "driver");
837 /***********************************************************/
838 void UnifracWeightedCommand::printWeightedFile() {
842 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
844 for(int a = 0; a < numComp; a++) {
845 output->initFile(groupComb[a], tags);
847 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
848 data.push_back(it->first); data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
849 output->output(data);
855 catch(exception& e) {
856 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
862 /***********************************************************/
863 void UnifracWeightedCommand::printWSummaryFile() {
866 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
867 m->mothurOut("Tree#\tGroups\tWScore\t");
868 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
869 outSum << endl; m->mothurOutEndLine();
872 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
876 for (int i = 0; i < T.size(); i++) {
877 for (int j = 0; j < numComp; j++) {
879 if (WScoreSig[count] > (1/(float)iters)) {
880 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
881 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
882 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" + toString(WScoreSig[count]) + "\n");
884 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
885 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
886 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" + toString((1/float(iters))) + "\n");
889 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
890 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
891 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n");
898 catch(exception& e) {
899 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
903 /***********************************************************/
904 void UnifracWeightedCommand::createPhylipFile() {
908 for (int i = 0; i < T.size(); i++) {
910 string phylipFileName;
911 if ((outputForm == "lt") || (outputForm == "square")) {
912 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.phylip." + getOutputFileNameTag("phylip");
913 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
915 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.column." + getOutputFileNameTag("column");
916 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
920 m->openOutputFile(phylipFileName, out);
922 if ((outputForm == "lt") || (outputForm == "square")) {
924 out << m->getNumGroups() << endl;
927 //make matrix with scores in it
928 vector< vector<float> > dists; dists.resize(m->getNumGroups());
929 for (int i = 0; i < m->getNumGroups(); i++) {
930 dists[i].resize(m->getNumGroups(), 0.0);
933 //flip it so you can print it
934 for (int r=0; r<m->getNumGroups(); r++) {
935 for (int l = 0; l < r; l++) {
936 dists[r][l] = utreeScores[count];
937 dists[l][r] = utreeScores[count];
943 for (int r=0; r<m->getNumGroups(); r++) {
945 string name = (m->getGroups())[r];
946 if (name.length() < 10) { //pad with spaces to make compatible
947 while (name.length() < 10) { name += " "; }
950 if (outputForm == "lt") {
954 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
956 }else if (outputForm == "square") {
960 for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; }
964 for (int l = 0; l < r; l++) {
965 string otherName = (m->getGroups())[l];
966 if (otherName.length() < 10) { //pad with spaces to make compatible
967 while (otherName.length() < 10) { otherName += " "; }
970 out << name << '\t' << otherName << dists[r][l] << endl;
977 catch(exception& e) {
978 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
982 /***********************************************************/
983 int UnifracWeightedCommand::findIndex(float score, int index) {
985 for (int i = 0; i < rScores[index].size(); i++) {
986 if (rScores[index][i] >= score) { return i; }
988 return rScores[index].size();
990 catch(exception& e) {
991 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
996 /***********************************************************/
998 void UnifracWeightedCommand::calculateFreqsCumuls() {
1000 //clear out old tree values
1002 rScoreFreq.resize(numComp);
1004 rCumul.resize(numComp);
1005 validScores.clear();
1007 //calculate frequency
1008 for (int f = 0; f < numComp; f++) {
1009 for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7... you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
1010 validScores[rScores[f][i]] = rScores[f][i];
1011 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
1012 if (it != rScoreFreq[f].end()) {
1013 rScoreFreq[f][rScores[f][i]]++;
1015 rScoreFreq[f][rScores[f][i]] = 1;
1021 for(int a = 0; a < numComp; a++) {
1022 float rcumul = 1.0000;
1023 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
1024 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
1025 //make rscoreFreq map and rCumul
1026 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
1027 rCumul[a][it->first] = rcumul;
1028 //get percentage of random trees with that info
1029 if (it2 != rScoreFreq[a].end()) { rScoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
1030 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
1035 catch(exception& e) {
1036 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
1040 /***********************************************************/