2 * unifracweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracweightedcommand.h"
11 #include "consensus.h"
12 #include "subsample.h"
13 #include "treereader.h"
15 //**********************************************************************************************************************
16 vector<string> UnifracWeightedCommand::setParameters(){
18 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","weighted-wsummary",false,true,true); parameters.push_back(ptree);
19 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
20 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
21 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
22 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
23 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25 CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
26 CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "","tree",false,false); parameters.push_back(pconsensus);
27 CommandParameter prandom("random", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prandom);
28 CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "","phylip-column",false,false); parameters.push_back(pdistance);
29 CommandParameter proot("root", "Boolean", "F", "", "", "", "","",false,false); parameters.push_back(proot);
30 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
31 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
33 vector<string> myArray;
34 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
38 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
42 //**********************************************************************************************************************
43 string UnifracWeightedCommand::getHelpString(){
45 string helpString = "";
46 helpString += "The unifrac.weighted command parameters are tree, group, name, count, groups, iters, distance, processors, root, subsample, consensus and random. tree parameter is required unless you have valid current tree file.\n";
47 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
48 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
49 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
50 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
51 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
52 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
53 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
54 helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results, as well as a consensus tree built from these trees. Default=F.\n";
55 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
56 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
57 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
58 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
59 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
63 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
67 //**********************************************************************************************************************
68 string UnifracWeightedCommand::getOutputPattern(string type) {
71 if (type == "weighted") { pattern = "[filename],weighted-[filename],[tag],weighted"; }
72 else if (type == "wsummary") { pattern = "[filename],wsummary"; }
73 else if (type == "phylip") { pattern = "[filename],[tag],[tag2],dist"; }
74 else if (type == "column") { pattern = "[filename],[tag],[tag2],dist"; }
75 else if (type == "tree") { pattern = "[filename],[tag],[tag2],tre"; }
76 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
81 m->errorOut(e, "UnifracWeightedCommand", "getOutputPattern");
85 //**********************************************************************************************************************
86 UnifracWeightedCommand::UnifracWeightedCommand(){
88 abort = true; calledHelp = true;
90 vector<string> tempOutNames;
91 outputTypes["weighted"] = tempOutNames;
92 outputTypes["wsummary"] = tempOutNames;
93 outputTypes["phylip"] = tempOutNames;
94 outputTypes["column"] = tempOutNames;
95 outputTypes["tree"] = tempOutNames;
98 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
103 /***********************************************************/
104 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
106 abort = false; calledHelp = false;
108 //allow user to run help
109 if(option == "help") { help(); abort = true; calledHelp = true; }
110 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
113 vector<string> myArray = setParameters();
115 OptionParser parser(option);
116 map<string,string> parameters=parser.getParameters();
117 map<string,string>::iterator it;
119 ValidParameters validParameter;
121 //check to make sure all parameters are valid for command
122 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
123 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
126 //initialize outputTypes
127 vector<string> tempOutNames;
128 outputTypes["weighted"] = tempOutNames;
129 outputTypes["wsummary"] = tempOutNames;
130 outputTypes["phylip"] = tempOutNames;
131 outputTypes["column"] = tempOutNames;
132 outputTypes["tree"] = tempOutNames;
134 //if the user changes the input directory command factory will send this info to us in the output parameter
135 string inputDir = validParameter.validFile(parameters, "inputdir", false);
136 if (inputDir == "not found"){ inputDir = ""; }
139 it = parameters.find("tree");
140 //user has given a template file
141 if(it != parameters.end()){
142 path = m->hasPath(it->second);
143 //if the user has not given a path then, add inputdir. else leave path alone.
144 if (path == "") { parameters["tree"] = inputDir + it->second; }
147 it = parameters.find("group");
148 //user has given a template file
149 if(it != parameters.end()){
150 path = m->hasPath(it->second);
151 //if the user has not given a path then, add inputdir. else leave path alone.
152 if (path == "") { parameters["group"] = inputDir + it->second; }
155 it = parameters.find("name");
156 //user has given a template file
157 if(it != parameters.end()){
158 path = m->hasPath(it->second);
159 //if the user has not given a path then, add inputdir. else leave path alone.
160 if (path == "") { parameters["name"] = inputDir + it->second; }
163 it = parameters.find("count");
164 //user has given a template file
165 if(it != parameters.end()){
166 path = m->hasPath(it->second);
167 //if the user has not given a path then, add inputdir. else leave path alone.
168 if (path == "") { parameters["count"] = inputDir + it->second; }
172 //check for required parameters
173 treefile = validParameter.validFile(parameters, "tree", true);
174 if (treefile == "not open") { treefile = ""; abort = true; }
175 else if (treefile == "not found") { //if there is a current design file, use it
176 treefile = m->getTreeFile();
177 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
178 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
179 }else { m->setTreeFile(treefile); }
181 //check for required parameters
182 groupfile = validParameter.validFile(parameters, "group", true);
183 if (groupfile == "not open") { abort = true; }
184 else if (groupfile == "not found") { groupfile = ""; }
185 else { m->setGroupFile(groupfile); }
187 namefile = validParameter.validFile(parameters, "name", true);
188 if (namefile == "not open") { namefile = ""; abort = true; }
189 else if (namefile == "not found") { namefile = ""; }
190 else { m->setNameFile(namefile); }
192 countfile = validParameter.validFile(parameters, "count", true);
193 if (countfile == "not open") { countfile = ""; abort = true; }
194 else if (countfile == "not found") { countfile = ""; }
195 else { m->setCountTableFile(countfile); }
197 if ((namefile != "") && (countfile != "")) {
198 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
201 if ((groupfile != "") && (countfile != "")) {
202 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
205 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
208 //check for optional parameter and set defaults
209 // ...at some point should added some additional type checking...
210 groups = validParameter.validFile(parameters, "groups", false);
211 if (groups == "not found") { groups = ""; }
213 m->splitAtDash(groups, Groups);
214 m->setGroups(Groups);
217 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
218 m->mothurConvert(itersString, iters);
220 string temp = validParameter.validFile(parameters, "distance", false);
221 if (temp == "not found") { phylip = false; outputForm = ""; }
223 if (temp=="phylip") { temp = "lt"; }
224 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
225 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
228 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "F"; }
229 random = m->isTrue(temp);
231 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
232 includeRoot = m->isTrue(temp);
234 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
235 m->setProcessors(temp);
236 m->mothurConvert(temp, processors);
238 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
239 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
241 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
242 else { subsample = false; }
245 if (!subsample) { subsampleIters = 0; }
246 else { subsampleIters = iters; }
248 temp = validParameter.validFile(parameters, "consensus", false); if (temp == "not found") { temp = "F"; }
249 consensus = m->isTrue(temp);
251 if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; }
252 if (countfile == "") { if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; } }
255 if ((!testCt.testGroups(countfile)) && (subsample)) {
256 m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true;
259 if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
260 if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
263 if (namefile == "") {
264 vector<string> files; files.push_back(treefile);
265 parser.getNameFile(files);
272 catch(exception& e) {
273 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
277 /***********************************************************/
278 int UnifracWeightedCommand::execute() {
281 if (abort == true) { if (calledHelp) { return 0; } return 2; }
283 m->setTreeFile(treefile);
286 if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
287 else { reader = new TreeReader(treefile, countfile); }
288 T = reader->getTrees();
289 ct = T[0]->getCountTable();
292 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
294 map<string, string> variables;
295 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
296 sumFile = getOutputFileName("wsummary",variables);
297 m->openOutputFile(sumFile, outSum);
298 outputNames.push_back(sumFile); outputTypes["wsummary"].push_back(sumFile);
301 string s; //to make work with setgroups
302 Groups = m->getGroups();
303 vector<string> nameGroups = ct->getNamesOfGroups();
304 if (nameGroups.size() < 2) { m->mothurOut("[ERROR]: You cannot run unifrac.weighted with less than 2 groups, aborting.\n"); delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
305 util.setGroups(Groups, nameGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze
306 m->setGroups(Groups);
308 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
310 Weighted weighted(includeRoot);
312 int start = time(NULL);
316 //user has not set size, set size = smallest samples size
317 if (subsampleSize == -1) {
318 vector<string> temp; temp.push_back(Groups[0]);
319 subsampleSize = ct->getGroupCount(Groups[0]); //num in first group
320 for (int i = 1; i < Groups.size(); i++) {
321 int thisSize = ct->getGroupCount(Groups[i]);
322 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
324 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
325 }else { //eliminate any too small groups
326 vector<string> newGroups = Groups;
328 for (int i = 0; i < newGroups.size(); i++) {
329 int thisSize = ct->getGroupCount(newGroups[i]);
331 if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); }
332 else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
334 m->setGroups(Groups);
338 //here in case some groups are removed by subsample
339 util.getCombos(groupComb, Groups, numComp);
341 if (numComp < processors) { processors = numComp; }
343 if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
345 //get weighted scores for users trees
346 for (int i = 0; i < T.size(); i++) {
348 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
351 rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
352 uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
354 vector<double> userData; userData.resize(numComp,0); //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
355 vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
358 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
359 variables["[tag]"] = toString(i+1);
360 string wFileName = getOutputFileName("weighted", variables);
361 output = new ColumnFile(wFileName, itersString);
362 outputNames.push_back(wFileName); outputTypes["weighted"].push_back(wFileName);
365 userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
366 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
369 for (int s=0; s<numComp; s++) {
370 //add users score to vector of user scores
371 uScores[s].push_back(userData[s]);
372 //save users tree score for summary file
373 utreeScores.push_back(userData[s]);
376 if (random) { runRandomCalcs(T[i], userData); }
384 vector< vector<double> > calcDistsTotals; //each iter, each groupCombos dists. this will be used to make .dist files
385 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
386 if (m->control_pressed) { break; }
388 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
389 CountTable* newCt = new CountTable();
392 if (m->debug) { sampleTime = time(NULL); }
394 //uses method of setting groups to doNotIncludeMe
396 Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
398 if (m->debug) { m->mothurOut("[DEBUG]: iter " + toString(thisIter) + " took " + toString(time(NULL) - sampleTime) + " seconds to sample tree.\n"); }
400 //call new weighted function
401 vector<double> iterData; iterData.resize(numComp,0);
402 Weighted thisWeighted(includeRoot);
403 iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
405 //save data to make ave dist, std dist
406 calcDistsTotals.push_back(iterData);
409 delete subSampleTree;
411 if((thisIter+1) % 100 == 0){ m->mothurOutJustToScreen(toString(thisIter+1)+"\n"); }
414 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
416 if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); }
417 if (consensus) { getConsensusTrees(calcDistsTotals, i); }
421 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
423 if (phylip) { createPhylipFile(); }
427 //clear out users groups
430 for (int i = 0; i < T.size(); i++) { delete T[i]; }
432 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
434 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
436 //set phylip file as new current phylipfile
438 itTypes = outputTypes.find("phylip");
439 if (itTypes != outputTypes.end()) {
440 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
443 //set column file as new current columnfile
444 itTypes = outputTypes.find("column");
445 if (itTypes != outputTypes.end()) {
446 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
449 m->mothurOutEndLine();
450 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
451 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
452 m->mothurOutEndLine();
457 catch(exception& e) {
458 m->errorOut(e, "UnifracWeightedCommand", "execute");
462 /**************************************************************************************************/
463 int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
465 //we need to find the average distance and standard deviation for each groups distance
468 vector<double> averages = m->getAverages(dists);
470 //find standard deviation
471 vector<double> stdDev = m->getStandardDeviation(dists, averages);
473 //make matrix with scores in it
474 vector< vector<double> > avedists; //avedists.resize(m->getNumGroups());
475 for (int i = 0; i < m->getNumGroups(); i++) {
477 for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
478 avedists.push_back(temp);
481 //make matrix with scores in it
482 vector< vector<double> > stddists; //stddists.resize(m->getNumGroups());
483 for (int i = 0; i < m->getNumGroups(); i++) {
485 for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
486 //stddists[i].resize(m->getNumGroups(), 0.0);
487 stddists.push_back(temp);
491 //flip it so you can print it
493 for (int r=0; r<m->getNumGroups(); r++) {
494 for (int l = 0; l < r; l++) {
495 avedists[r][l] = averages[count];
496 avedists[l][r] = averages[count];
497 stddists[r][l] = stdDev[count];
498 stddists[l][r] = stdDev[count];
503 map<string, string> variables;
504 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
505 variables["[tag]"] = toString(treeNum+1);
506 variables["[tag2]"] = "weighted.ave";
507 string aveFileName = getOutputFileName("phylip",variables);
508 if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); }
509 else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName); }
511 m->openOutputFile(aveFileName, out);
513 variables["[tag2]"] = "weighted.std";
514 string stdFileName = getOutputFileName("phylip",variables);
515 if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
516 else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }
518 m->openOutputFile(stdFileName, outStd);
520 if ((outputForm == "lt") || (outputForm == "square")) {
522 out << m->getNumGroups() << endl;
523 outStd << m->getNumGroups() << endl;
527 for (int r=0; r<m->getNumGroups(); r++) {
529 string name = (m->getGroups())[r];
530 if (name.length() < 10) { //pad with spaces to make compatible
531 while (name.length() < 10) { name += " "; }
534 if (outputForm == "lt") {
536 outStd << name << '\t';
539 for (int l = 0; l < r; l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t';}
540 out << endl; outStd << endl;
541 }else if (outputForm == "square") {
543 outStd << name << '\t';
546 for (int l = 0; l < m->getNumGroups(); l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t'; }
547 out << endl; outStd << endl;
550 for (int l = 0; l < r; l++) {
551 string otherName = (m->getGroups())[l];
552 if (otherName.length() < 10) { //pad with spaces to make compatible
553 while (otherName.length() < 10) { otherName += " "; }
556 out << name << '\t' << otherName << avedists[r][l] << endl;
557 outStd << name << '\t' << otherName << stddists[r][l] << endl;
566 catch(exception& e) {
567 m->errorOut(e, "UnifracWeightedCommand", "getAverageSTDMatrices");
572 /**************************************************************************************************/
573 int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
576 //used in tree constructor
579 ///create treemap class from groupmap for tree class to use
582 map<string, string> groupMap;
584 for (int i = 0; i < m->getGroups().size(); i++) {
585 nameMap.insert(m->getGroups()[i]);
586 gps.insert(m->getGroups()[i]);
587 groupMap[m->getGroups()[i]] = m->getGroups()[i];
589 newCt.createTable(nameMap, groupMap, gps);
591 //clear old tree names if any
592 m->Treenames.clear();
594 //fills globaldatas tree names
595 m->Treenames = m->getGroups();
597 vector<Tree*> newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created
599 if (m->control_pressed) { return 0; }
602 Tree* conTree = con.getTree(newTrees);
604 //create a new filename
605 map<string, string> variables;
606 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
607 variables["[tag]"] = toString(treeNum+1);
608 variables["[tag2]"] = "weighted.cons";
609 string conFile = getOutputFileName("tree",variables);
610 outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
612 m->openOutputFile(conFile, outTree);
614 if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
620 catch(exception& e) {
621 m->errorOut(e, "UnifracWeightedCommand", "getConsensusTrees");
625 /**************************************************************************************************/
627 vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, CountTable& myct) {
632 //create a new filename
633 map<string, string> variables;
634 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
635 variables["[tag]"] = toString(treeNum+1);
636 variables["[tag2]"] = "weighted.all";
637 string outputFile = getOutputFileName("tree",variables);
638 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
641 m->openOutputFile(outputFile, outAll);
644 for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
646 if (m->control_pressed) { break; }
648 //make matrix with scores in it
649 vector< vector<double> > sims; sims.resize(m->getNumGroups());
650 for (int j = 0; j < m->getNumGroups(); j++) {
651 sims[j].resize(m->getNumGroups(), 0.0);
655 for (int r=0; r<m->getNumGroups(); r++) {
656 for (int l = 0; l < r; l++) {
657 double sim = -(dists[i][count]-1.0);
665 Tree* tempTree = new Tree(&myct, sims);
666 tempTree->assembleTree();
668 trees.push_back(tempTree);
671 tempTree->print(outAll);
676 if (m->control_pressed) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
680 catch(exception& e) {
681 m->errorOut(e, "UnifracWeightedCommand", "buildTrees");
685 /**************************************************************************************************/
687 int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
690 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
691 vector< vector<string> > namesOfGroupCombos;
692 for (int a=0; a<numGroups; a++) {
693 for (int l = 0; l < a; l++) {
694 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
695 namesOfGroupCombos.push_back(groups);
701 //breakdown work between processors
702 int remainingPairs = namesOfGroupCombos.size();
704 for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) {
705 int numPairs = remainingPairs; //case for last processor
706 if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); }
707 lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs
708 startIndex = startIndex + numPairs;
709 remainingPairs = remainingPairs - numPairs;
714 //get scores for random trees
715 for (int j = 0; j < iters; j++) {
716 //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
717 //if(processors == 1){
718 // driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
720 createProcesses(thisTree, namesOfGroupCombos, rScores);
723 //driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
727 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
732 //find the signifigance of the score for summary file
733 for (int f = 0; f < numComp; f++) {
735 sort(rScores[f].begin(), rScores[f].end());
737 //the index of the score higher than yours is returned
738 //so if you have 1000 random trees the index returned is 100
739 //then there are 900 trees with a score greater then you.
740 //giving you a signifigance of 0.900
741 int index = findIndex(usersScores[f], f); if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
743 //the signifigance is the number of trees with the users score or higher
744 WScoreSig.push_back((iters-index)/(float)iters);
747 //out << "Tree# " << i << endl;
748 calculateFreqsCumuls();
755 catch(exception& e) {
756 m->errorOut(e, "UnifracWeightedCommand", "runRandomCalcs");
760 /**************************************************************************************************/
762 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
765 vector<int> processIDS;
768 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
769 //loop through and create all the processes you want
770 while (process != processors) {
774 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
777 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
779 //pass numSeqs to parent
781 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
782 m->openOutputFile(tempFile, out);
783 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t'; } out << endl;
788 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
789 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
794 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
796 //force parent to wait until all the processes are done
797 for (int i=0;i<(processors-1);i++) {
798 int temp = processIDS[i];
802 //get data created by processes
803 for (int i=0;i<(processors-1);i++) {
806 string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
807 m->openInputFile(s, in);
810 for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
816 vector<weightedRandomData*> pDataArray;
817 DWORD dwThreadIdArray[processors-1];
818 HANDLE hThreadArray[processors-1];
819 vector<CountTable*> cts;
822 //Create processor worker threads.
823 for( int i=1; i<processors; i++ ){
824 CountTable* copyCount = new CountTable();
826 Tree* copyTree = new Tree(copyCount);
827 copyTree->getCopy(t);
829 cts.push_back(copyCount);
830 trees.push_back(copyTree);
832 vector< vector<double> > copyScores = rScores;
834 weightedRandomData* tempweighted = new weightedRandomData(m, lines[i].start, lines[i].num, namesOfGroupCombos, copyTree, copyCount, includeRoot, copyScores);
835 pDataArray.push_back(tempweighted);
836 processIDS.push_back(i);
838 hThreadArray[i-1] = CreateThread(NULL, 0, MyWeightedRandomThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
841 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
843 //Wait until all threads have terminated.
844 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
846 //Close all thread handles and free memory allocations.
847 for(int i=0; i < pDataArray.size(); i++){
848 for (int j = pDataArray[i]->start; j < (pDataArray[i]->start+pDataArray[i]->num); j++) {
849 scores[j].push_back(pDataArray[i]->scores[j][pDataArray[i]->scores[j].size()-1]);
853 CloseHandle(hThreadArray[i]);
854 delete pDataArray[i];
862 catch(exception& e) {
863 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
868 /**************************************************************************************************/
869 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) {
871 Tree* randT = new Tree(ct);
873 Weighted weighted(includeRoot);
875 for (int h = start; h < (start+num); h++) {
877 if (m->control_pressed) { return 0; }
879 //initialize weighted score
880 string groupA = namesOfGroupCombos[h][0];
881 string groupB = namesOfGroupCombos[h][1];
886 //create a random tree with same topology as T[i], but different labels
887 randT->assembleRandomUnifracTree(groupA, groupB);
889 if (m->control_pressed) { delete randT; return 0; }
891 //get wscore of random tree
892 EstOutput randomData = weighted.getValues(randT, groupA, groupB);
894 if (m->control_pressed) { delete randT; return 0; }
897 scores[h].push_back(randomData[0]);
905 catch(exception& e) {
906 m->errorOut(e, "UnifracWeightedCommand", "driver");
910 /***********************************************************/
911 void UnifracWeightedCommand::printWeightedFile() {
915 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
917 for(int a = 0; a < numComp; a++) {
918 output->initFile(groupComb[a], tags);
920 for (map<double,double>::iterator it = validScores.begin(); it != validScores.end(); it++) {
921 data.push_back(it->first); data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
922 output->output(data);
928 catch(exception& e) {
929 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
935 /***********************************************************/
936 void UnifracWeightedCommand::printWSummaryFile() {
939 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
940 m->mothurOut("Tree#\tGroups\tWScore\t");
941 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
942 outSum << endl; m->mothurOutEndLine();
945 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
949 for (int i = 0; i < T.size(); i++) {
950 for (int j = 0; j < numComp; j++) {
952 if (WScoreSig[count] > (1/(float)iters)) {
953 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
954 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
955 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" + toString(WScoreSig[count]) + "\n");
957 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
958 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
959 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" + toString((1/float(iters))) + "\n");
962 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
963 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
964 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n");
971 catch(exception& e) {
972 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
976 /***********************************************************/
977 void UnifracWeightedCommand::createPhylipFile() {
981 for (int i = 0; i < T.size(); i++) {
983 string phylipFileName;
984 map<string, string> variables;
985 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
986 variables["[tag]"] = toString(i+1);
987 if ((outputForm == "lt") || (outputForm == "square")) {
988 variables["[tag2]"] = "weighted.phylip";
989 phylipFileName = getOutputFileName("phylip",variables);
990 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
992 variables["[tag2]"] = "weighted.column";
993 phylipFileName = getOutputFileName("column",variables);
994 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
999 m->openOutputFile(phylipFileName, out);
1001 if ((outputForm == "lt") || (outputForm == "square")) {
1003 out << m->getNumGroups() << endl;
1006 //make matrix with scores in it
1007 vector< vector<float> > dists; dists.resize(m->getNumGroups());
1008 for (int i = 0; i < m->getNumGroups(); i++) {
1009 dists[i].resize(m->getNumGroups(), 0.0);
1012 //flip it so you can print it
1013 for (int r=0; r<m->getNumGroups(); r++) {
1014 for (int l = 0; l < r; l++) {
1015 dists[r][l] = utreeScores[count];
1016 dists[l][r] = utreeScores[count];
1022 for (int r=0; r<m->getNumGroups(); r++) {
1024 string name = (m->getGroups())[r];
1025 if (name.length() < 10) { //pad with spaces to make compatible
1026 while (name.length() < 10) { name += " "; }
1029 if (outputForm == "lt") {
1030 out << name << '\t';
1033 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
1035 }else if (outputForm == "square") {
1036 out << name << '\t';
1039 for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; }
1043 for (int l = 0; l < r; l++) {
1044 string otherName = (m->getGroups())[l];
1045 if (otherName.length() < 10) { //pad with spaces to make compatible
1046 while (otherName.length() < 10) { otherName += " "; }
1049 out << name << '\t' << otherName << dists[r][l] << endl;
1056 catch(exception& e) {
1057 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
1061 /***********************************************************/
1062 int UnifracWeightedCommand::findIndex(float score, int index) {
1064 for (int i = 0; i < rScores[index].size(); i++) {
1065 if (rScores[index][i] >= score) { return i; }
1067 return rScores[index].size();
1069 catch(exception& e) {
1070 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
1075 /***********************************************************/
1077 void UnifracWeightedCommand::calculateFreqsCumuls() {
1079 //clear out old tree values
1081 rScoreFreq.resize(numComp);
1083 rCumul.resize(numComp);
1084 validScores.clear();
1086 //calculate frequency
1087 for (int f = 0; f < numComp; f++) {
1088 for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7... you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
1089 validScores[rScores[f][i]] = rScores[f][i];
1090 map<double,double>::iterator it = rScoreFreq[f].find(rScores[f][i]);
1091 if (it != rScoreFreq[f].end()) {
1092 rScoreFreq[f][rScores[f][i]]++;
1094 rScoreFreq[f][rScores[f][i]] = 1;
1100 for(int a = 0; a < numComp; a++) {
1101 float rcumul = 1.0000;
1102 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
1103 for (map<double,double>::iterator it = validScores.begin(); it != validScores.end(); it++) {
1104 //make rscoreFreq map and rCumul
1105 map<double,double>::iterator it2 = rScoreFreq[a].find(it->first);
1106 rCumul[a][it->first] = rcumul;
1107 //get percentage of random trees with that info
1108 if (it2 != rScoreFreq[a].end()) { rScoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
1109 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
1114 catch(exception& e) {
1115 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
1119 /***********************************************************/