2 * unifracweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracweightedcommand.h"
11 #include "consensus.h"
12 #include "subsample.h"
13 #include "treereader.h"
15 //**********************************************************************************************************************
16 vector<string> UnifracWeightedCommand::setParameters(){
18 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
19 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
20 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24 CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
25 CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pconsensus);
26 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
27 CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "",false,false); parameters.push_back(pdistance);
28 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
41 //**********************************************************************************************************************
42 string UnifracWeightedCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root, subsample, consensus and random. tree parameter is required unless you have valid current tree file.\n";
46 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
47 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
48 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
49 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
50 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
51 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
52 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
53 helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results, as well as a consensus tree built from these trees. Default=F.\n";
54 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
55 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
56 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
57 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
58 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
62 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
66 //**********************************************************************************************************************
67 string UnifracWeightedCommand::getOutputFileNameTag(string type, string inputName=""){
69 string outputFileName = "";
70 map<string, vector<string> >::iterator it;
72 //is this a type this command creates
73 it = outputTypes.find(type);
74 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
76 if (type == "weighted") { outputFileName = "weighted"; }
77 else if (type == "wsummary") { outputFileName = "wsummary"; }
78 else if (type == "phylip") { outputFileName = "dist"; }
79 else if (type == "column") { outputFileName = "dist"; }
80 else if (type == "tree") { outputFileName = "tre"; }
81 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
83 return outputFileName;
86 m->errorOut(e, "UnifracWeightedCommand", "getOutputFileNameTag");
90 //**********************************************************************************************************************
91 UnifracWeightedCommand::UnifracWeightedCommand(){
93 abort = true; calledHelp = true;
95 vector<string> tempOutNames;
96 outputTypes["weighted"] = tempOutNames;
97 outputTypes["wsummary"] = tempOutNames;
98 outputTypes["phylip"] = tempOutNames;
99 outputTypes["column"] = tempOutNames;
100 outputTypes["tree"] = tempOutNames;
102 catch(exception& e) {
103 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
108 /***********************************************************/
109 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
111 abort = false; calledHelp = false;
113 //allow user to run help
114 if(option == "help") { help(); abort = true; calledHelp = true; }
115 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
118 vector<string> myArray = setParameters();
120 OptionParser parser(option);
121 map<string,string> parameters=parser.getParameters();
122 map<string,string>::iterator it;
124 ValidParameters validParameter;
126 //check to make sure all parameters are valid for command
127 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
128 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
131 //initialize outputTypes
132 vector<string> tempOutNames;
133 outputTypes["weighted"] = tempOutNames;
134 outputTypes["wsummary"] = tempOutNames;
135 outputTypes["phylip"] = tempOutNames;
136 outputTypes["column"] = tempOutNames;
137 outputTypes["tree"] = tempOutNames;
139 //if the user changes the input directory command factory will send this info to us in the output parameter
140 string inputDir = validParameter.validFile(parameters, "inputdir", false);
141 if (inputDir == "not found"){ inputDir = ""; }
144 it = parameters.find("tree");
145 //user has given a template file
146 if(it != parameters.end()){
147 path = m->hasPath(it->second);
148 //if the user has not given a path then, add inputdir. else leave path alone.
149 if (path == "") { parameters["tree"] = inputDir + it->second; }
152 it = parameters.find("group");
153 //user has given a template file
154 if(it != parameters.end()){
155 path = m->hasPath(it->second);
156 //if the user has not given a path then, add inputdir. else leave path alone.
157 if (path == "") { parameters["group"] = inputDir + it->second; }
160 it = parameters.find("name");
161 //user has given a template file
162 if(it != parameters.end()){
163 path = m->hasPath(it->second);
164 //if the user has not given a path then, add inputdir. else leave path alone.
165 if (path == "") { parameters["name"] = inputDir + it->second; }
169 //check for required parameters
170 treefile = validParameter.validFile(parameters, "tree", true);
171 if (treefile == "not open") { treefile = ""; abort = true; }
172 else if (treefile == "not found") { //if there is a current design file, use it
173 treefile = m->getTreeFile();
174 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
175 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
176 }else { m->setTreeFile(treefile); }
178 //check for required parameters
179 groupfile = validParameter.validFile(parameters, "group", true);
180 if (groupfile == "not open") { abort = true; }
181 else if (groupfile == "not found") { groupfile = ""; }
182 else { m->setGroupFile(groupfile); }
184 namefile = validParameter.validFile(parameters, "name", true);
185 if (namefile == "not open") { namefile = ""; abort = true; }
186 else if (namefile == "not found") { namefile = ""; }
187 else { m->setNameFile(namefile); }
189 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
192 //check for optional parameter and set defaults
193 // ...at some point should added some additional type checking...
194 groups = validParameter.validFile(parameters, "groups", false);
195 if (groups == "not found") { groups = ""; }
197 m->splitAtDash(groups, Groups);
198 m->setGroups(Groups);
201 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
202 m->mothurConvert(itersString, iters);
204 string temp = validParameter.validFile(parameters, "distance", false);
205 if (temp == "not found") { phylip = false; outputForm = ""; }
207 if (temp=="phylip") { temp = "lt"; }
208 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
209 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
212 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "F"; }
213 random = m->isTrue(temp);
215 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
216 includeRoot = m->isTrue(temp);
218 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
219 m->setProcessors(temp);
220 m->mothurConvert(temp, processors);
222 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
223 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
225 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
226 else { subsample = false; }
229 if (!subsample) { subsampleIters = 0; }
230 else { subsampleIters = iters; }
232 temp = validParameter.validFile(parameters, "consensus", false); if (temp == "not found") { temp = "F"; }
233 consensus = m->isTrue(temp);
235 if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; }
236 if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; }
237 if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
238 if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
240 if (namefile == "") {
241 vector<string> files; files.push_back(treefile);
242 parser.getNameFile(files);
248 catch(exception& e) {
249 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
253 /***********************************************************/
254 int UnifracWeightedCommand::execute() {
257 if (abort == true) { if (calledHelp) { return 0; } return 2; }
259 m->setTreeFile(treefile);
261 TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
262 T = reader->getTrees();
263 tmap = T[0]->getTreeMap();
264 map<string, string> nameMap = reader->getNames();
265 map<string, string> unique2Dup = reader->getNameMap();
268 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
270 sumFile = outputDir + m->getSimpleName(treefile) + getOutputFileNameTag("wsummary");
271 m->openOutputFile(sumFile, outSum);
272 outputNames.push_back(sumFile); outputTypes["wsummary"].push_back(sumFile);
275 string s; //to make work with setgroups
276 Groups = m->getGroups();
277 vector<string> nameGroups = tmap->getNamesOfGroups();
278 util.setGroups(Groups, nameGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze
279 m->setGroups(Groups);
281 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
283 Weighted weighted(includeRoot);
285 int start = time(NULL);
289 //user has not set size, set size = smallest samples size
290 if (subsampleSize == -1) {
291 vector<string> temp; temp.push_back(Groups[0]);
292 subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
293 for (int i = 1; i < Groups.size(); i++) {
294 temp.clear(); temp.push_back(Groups[i]);
295 int thisSize = (tmap->getNamesSeqs(temp)).size();
296 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
298 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
299 }else { //eliminate any too small groups
300 vector<string> newGroups = Groups;
302 for (int i = 0; i < newGroups.size(); i++) {
303 vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
304 vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
305 int thisSize = thisGroupsSeqs.size();
307 if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); }
308 else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
310 m->setGroups(Groups);
314 //here in case some groups are removed by subsample
315 util.getCombos(groupComb, Groups, numComp);
317 if (numComp < processors) { processors = numComp; }
319 if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
321 //get weighted scores for users trees
322 for (int i = 0; i < T.size(); i++) {
324 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
327 rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
328 uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
330 vector<double> userData; userData.resize(numComp,0); //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
331 vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
334 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("weighted"), itersString);
335 outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("weighted"));
336 outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("weighted"));
339 userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
340 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
343 for (int s=0; s<numComp; s++) {
344 //add users score to vector of user scores
345 uScores[s].push_back(userData[s]);
346 //save users tree score for summary file
347 utreeScores.push_back(userData[s]);
350 if (random) { runRandomCalcs(T[i], userData); }
358 vector< vector<double> > calcDistsTotals; //each iter, each groupCombos dists. this will be used to make .dist files
359 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
361 if (m->control_pressed) { break; }
363 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
364 TreeMap* newTmap = new TreeMap();
365 //newTmap->getCopy(*tmap);
368 //Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
370 //uses method of setting groups to doNotIncludeMe
372 Tree* subSampleTree = sample.getSample(T[i], tmap, newTmap, subsampleSize, unique2Dup);
374 //call new weighted function
375 vector<double> iterData; iterData.resize(numComp,0);
376 Weighted thisWeighted(includeRoot);
377 iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
379 //save data to make ave dist, std dist
380 calcDistsTotals.push_back(iterData);
383 delete subSampleTree;
385 if((thisIter+1) % 100 == 0){ m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine(); }
388 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
390 if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); }
391 if (consensus) { getConsensusTrees(calcDistsTotals, i); }
395 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
397 if (phylip) { createPhylipFile(); }
401 //clear out users groups
404 for (int i = 0; i < T.size(); i++) { delete T[i]; }
406 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
408 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
410 //set phylip file as new current phylipfile
412 itTypes = outputTypes.find("phylip");
413 if (itTypes != outputTypes.end()) {
414 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
417 //set column file as new current columnfile
418 itTypes = outputTypes.find("column");
419 if (itTypes != outputTypes.end()) {
420 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
423 m->mothurOutEndLine();
424 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
425 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
426 m->mothurOutEndLine();
431 catch(exception& e) {
432 m->errorOut(e, "UnifracWeightedCommand", "execute");
436 /**************************************************************************************************/
437 int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
439 //we need to find the average distance and standard deviation for each groups distance
442 vector<double> averages; averages.resize(numComp, 0);
443 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
444 for (int i = 0; i < dists[thisIter].size(); i++) {
445 averages[i] += dists[thisIter][i];
450 for (int i = 0; i < averages.size(); i++) { averages[i] /= (float) subsampleIters; }
452 //find standard deviation
453 vector<double> stdDev; stdDev.resize(numComp, 0);
455 for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
456 for (int j = 0; j < dists[thisIter].size(); j++) {
457 stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
460 for (int i = 0; i < stdDev.size(); i++) {
461 stdDev[i] /= (float) subsampleIters;
462 stdDev[i] = sqrt(stdDev[i]);
465 //make matrix with scores in it
466 vector< vector<double> > avedists; avedists.resize(m->getNumGroups());
467 for (int i = 0; i < m->getNumGroups(); i++) {
468 avedists[i].resize(m->getNumGroups(), 0.0);
471 //make matrix with scores in it
472 vector< vector<double> > stddists; stddists.resize(m->getNumGroups());
473 for (int i = 0; i < m->getNumGroups(); i++) {
474 stddists[i].resize(m->getNumGroups(), 0.0);
477 //flip it so you can print it
479 for (int r=0; r<m->getNumGroups(); r++) {
480 for (int l = 0; l < r; l++) {
481 avedists[r][l] = averages[count];
482 avedists[l][r] = averages[count];
483 stddists[r][l] = stdDev[count];
484 stddists[l][r] = stdDev[count];
489 string aveFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.ave." + getOutputFileNameTag("phylip");
490 if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); }
491 else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName); }
493 m->openOutputFile(aveFileName, out);
495 string stdFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.std." + getOutputFileNameTag("phylip");
496 if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
497 else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }
499 m->openOutputFile(stdFileName, outStd);
501 if ((outputForm == "lt") || (outputForm == "square")) {
503 out << m->getNumGroups() << endl;
504 outStd << m->getNumGroups() << endl;
508 for (int r=0; r<m->getNumGroups(); r++) {
510 string name = (m->getGroups())[r];
511 if (name.length() < 10) { //pad with spaces to make compatible
512 while (name.length() < 10) { name += " "; }
515 if (outputForm == "lt") {
517 outStd << name << '\t';
520 for (int l = 0; l < r; l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t';}
521 out << endl; outStd << endl;
522 }else if (outputForm == "square") {
524 outStd << name << '\t';
527 for (int l = 0; l < m->getNumGroups(); l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t'; }
528 out << endl; outStd << endl;
531 for (int l = 0; l < r; l++) {
532 string otherName = (m->getGroups())[l];
533 if (otherName.length() < 10) { //pad with spaces to make compatible
534 while (otherName.length() < 10) { otherName += " "; }
537 out << name << '\t' << otherName << avedists[r][l] << endl;
538 outStd << name << '\t' << otherName << stddists[r][l] << endl;
547 catch(exception& e) {
548 m->errorOut(e, "UnifracWeightedCommand", "getAverageSTDMatrices");
553 /**************************************************************************************************/
554 int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
557 //used in tree constructor
560 //create treemap class from groupmap for tree class to use
562 newTmap.makeSim(m->getGroups());
564 //clear old tree names if any
565 m->Treenames.clear();
567 //fills globaldatas tree names
568 m->Treenames = m->getGroups();
570 vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
572 if (m->control_pressed) { return 0; }
575 Tree* conTree = con.getTree(newTrees);
577 //create a new filename
578 string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons." + getOutputFileNameTag("tree");
579 outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
581 m->openOutputFile(conFile, outTree);
583 if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
589 catch(exception& e) {
590 m->errorOut(e, "UnifracWeightedCommand", "getConsensusTrees");
594 /**************************************************************************************************/
596 vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
601 //create a new filename
602 string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.all." + getOutputFileNameTag("tree");
603 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
606 m->openOutputFile(outputFile, outAll);
609 for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
611 if (m->control_pressed) { break; }
613 //make matrix with scores in it
614 vector< vector<double> > sims; sims.resize(m->getNumGroups());
615 for (int j = 0; j < m->getNumGroups(); j++) {
616 sims[j].resize(m->getNumGroups(), 0.0);
620 for (int r=0; r<m->getNumGroups(); r++) {
621 for (int l = 0; l < r; l++) {
622 double sim = -(dists[i][count]-1.0);
630 Tree* tempTree = new Tree(&mytmap, sims);
631 map<string, string> empty;
632 tempTree->assembleTree(empty);
634 trees.push_back(tempTree);
637 tempTree->print(outAll);
642 if (m->control_pressed) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
646 catch(exception& e) {
647 m->errorOut(e, "UnifracWeightedCommand", "buildTrees");
651 /**************************************************************************************************/
653 int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
656 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
657 vector< vector<string> > namesOfGroupCombos;
658 for (int a=0; a<numGroups; a++) {
659 for (int l = 0; l < a; l++) {
660 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
661 namesOfGroupCombos.push_back(groups);
667 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
669 int numPairs = namesOfGroupCombos.size();
670 int numPairsPerProcessor = numPairs / processors;
672 for (int i = 0; i < processors; i++) {
673 int startPos = i * numPairsPerProcessor;
674 if(i == processors - 1){
675 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
677 lines.push_back(linePair(startPos, numPairsPerProcessor));
683 //get scores for random trees
684 for (int j = 0; j < iters; j++) {
686 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
688 driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
690 createProcesses(thisTree, namesOfGroupCombos, rScores);
693 driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
696 if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
699 // m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();
703 //find the signifigance of the score for summary file
704 for (int f = 0; f < numComp; f++) {
706 sort(rScores[f].begin(), rScores[f].end());
708 //the index of the score higher than yours is returned
709 //so if you have 1000 random trees the index returned is 100
710 //then there are 900 trees with a score greater then you.
711 //giving you a signifigance of 0.900
712 int index = findIndex(usersScores[f], f); if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
714 //the signifigance is the number of trees with the users score or higher
715 WScoreSig.push_back((iters-index)/(float)iters);
718 //out << "Tree# " << i << endl;
719 calculateFreqsCumuls();
726 catch(exception& e) {
727 m->errorOut(e, "UnifracWeightedCommand", "runRandomCalcs");
731 /**************************************************************************************************/
733 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
735 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
737 vector<int> processIDS;
741 //loop through and create all the processes you want
742 while (process != processors) {
746 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
749 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
751 //pass numSeqs to parent
753 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
754 m->openOutputFile(tempFile, out);
755 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t'; } out << endl;
760 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
761 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
766 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
768 //force parent to wait until all the processes are done
769 for (int i=0;i<(processors-1);i++) {
770 int temp = processIDS[i];
774 //get data created by processes
775 for (int i=0;i<(processors-1);i++) {
778 string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
779 m->openInputFile(s, in);
782 for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
790 catch(exception& e) {
791 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
796 /**************************************************************************************************/
797 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) {
799 Tree* randT = new Tree(tmap);
801 Weighted weighted(includeRoot);
803 for (int h = start; h < (start+num); h++) {
805 if (m->control_pressed) { return 0; }
807 //initialize weighted score
808 string groupA = namesOfGroupCombos[h][0];
809 string groupB = namesOfGroupCombos[h][1];
814 //create a random tree with same topology as T[i], but different labels
815 randT->assembleRandomUnifracTree(groupA, groupB);
817 if (m->control_pressed) { delete randT; return 0; }
819 //get wscore of random tree
820 EstOutput randomData = weighted.getValues(randT, groupA, groupB);
822 if (m->control_pressed) { delete randT; return 0; }
825 scores[h].push_back(randomData[0]);
833 catch(exception& e) {
834 m->errorOut(e, "UnifracWeightedCommand", "driver");
838 /***********************************************************/
839 void UnifracWeightedCommand::printWeightedFile() {
843 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
845 for(int a = 0; a < numComp; a++) {
846 output->initFile(groupComb[a], tags);
848 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
849 data.push_back(it->first); data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
850 output->output(data);
856 catch(exception& e) {
857 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
863 /***********************************************************/
864 void UnifracWeightedCommand::printWSummaryFile() {
867 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
868 m->mothurOut("Tree#\tGroups\tWScore\t");
869 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
870 outSum << endl; m->mothurOutEndLine();
873 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
877 for (int i = 0; i < T.size(); i++) {
878 for (int j = 0; j < numComp; j++) {
880 if (WScoreSig[count] > (1/(float)iters)) {
881 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
882 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
883 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" + toString(WScoreSig[count]) + "\n");
885 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
886 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
887 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" + toString((1/float(iters))) + "\n");
890 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
891 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
892 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n");
899 catch(exception& e) {
900 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
904 /***********************************************************/
905 void UnifracWeightedCommand::createPhylipFile() {
909 for (int i = 0; i < T.size(); i++) {
911 string phylipFileName;
912 if ((outputForm == "lt") || (outputForm == "square")) {
913 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.phylip." + getOutputFileNameTag("phylip");
914 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
916 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.column." + getOutputFileNameTag("column");
917 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
921 m->openOutputFile(phylipFileName, out);
923 if ((outputForm == "lt") || (outputForm == "square")) {
925 out << m->getNumGroups() << endl;
928 //make matrix with scores in it
929 vector< vector<float> > dists; dists.resize(m->getNumGroups());
930 for (int i = 0; i < m->getNumGroups(); i++) {
931 dists[i].resize(m->getNumGroups(), 0.0);
934 //flip it so you can print it
935 for (int r=0; r<m->getNumGroups(); r++) {
936 for (int l = 0; l < r; l++) {
937 dists[r][l] = utreeScores[count];
938 dists[l][r] = utreeScores[count];
944 for (int r=0; r<m->getNumGroups(); r++) {
946 string name = (m->getGroups())[r];
947 if (name.length() < 10) { //pad with spaces to make compatible
948 while (name.length() < 10) { name += " "; }
951 if (outputForm == "lt") {
955 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
957 }else if (outputForm == "square") {
961 for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; }
965 for (int l = 0; l < r; l++) {
966 string otherName = (m->getGroups())[l];
967 if (otherName.length() < 10) { //pad with spaces to make compatible
968 while (otherName.length() < 10) { otherName += " "; }
971 out << name << '\t' << otherName << dists[r][l] << endl;
978 catch(exception& e) {
979 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
983 /***********************************************************/
984 int UnifracWeightedCommand::findIndex(float score, int index) {
986 for (int i = 0; i < rScores[index].size(); i++) {
987 if (rScores[index][i] >= score) { return i; }
989 return rScores[index].size();
991 catch(exception& e) {
992 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
997 /***********************************************************/
999 void UnifracWeightedCommand::calculateFreqsCumuls() {
1001 //clear out old tree values
1003 rScoreFreq.resize(numComp);
1005 rCumul.resize(numComp);
1006 validScores.clear();
1008 //calculate frequency
1009 for (int f = 0; f < numComp; f++) {
1010 for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7... you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
1011 validScores[rScores[f][i]] = rScores[f][i];
1012 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
1013 if (it != rScoreFreq[f].end()) {
1014 rScoreFreq[f][rScores[f][i]]++;
1016 rScoreFreq[f][rScores[f][i]] = 1;
1022 for(int a = 0; a < numComp; a++) {
1023 float rcumul = 1.0000;
1024 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
1025 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
1026 //make rscoreFreq map and rCumul
1027 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
1028 rCumul[a][it->first] = rcumul;
1029 //get percentage of random trees with that info
1030 if (it2 != rScoreFreq[a].end()) { rScoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
1031 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
1036 catch(exception& e) {
1037 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
1041 /***********************************************************/