2 * unifracweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracweightedcommand.h"
11 #include "consensus.h"
12 #include "subsample.h"
13 #include "treereader.h"
15 //**********************************************************************************************************************
16 vector<string> UnifracWeightedCommand::setParameters(){
18 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","weighted-wsummary",false,true,true); parameters.push_back(ptree);
19 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
20 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
21 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
22 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
23 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25 CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
26 CommandParameter pconsensus("consensus", "Boolean", "", "F", "", "", "","tree",false,false); parameters.push_back(pconsensus);
27 CommandParameter prandom("random", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prandom);
28 CommandParameter pdistance("distance", "Multiple", "column-lt-square-phylip", "column", "", "", "","phylip-column",false,false); parameters.push_back(pdistance);
29 CommandParameter proot("root", "Boolean", "F", "", "", "", "","",false,false); parameters.push_back(proot);
30 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
31 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
33 vector<string> myArray;
34 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
38 m->errorOut(e, "UnifracWeightedCommand", "setParameters");
42 //**********************************************************************************************************************
43 string UnifracWeightedCommand::getHelpString(){
45 string helpString = "";
46 helpString += "The unifrac.weighted command parameters are tree, group, name, count, groups, iters, distance, processors, root, subsample, consensus and random. tree parameter is required unless you have valid current tree file.\n";
47 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
48 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
49 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
50 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n";
51 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
52 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
53 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group. The subsample parameter may only be used with a group file.\n";
54 helpString += "The consensus parameter allows you to indicate you would like trees built from distance matrices created with the results, as well as a consensus tree built from these trees. Default=F.\n";
55 helpString += "The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n";
56 helpString += "Example unifrac.weighted(groups=A-B-C, iters=500).\n";
57 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
58 helpString += "The unifrac.weighted command output two files: .weighted and .wsummary their descriptions are in the manual.\n";
59 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
63 m->errorOut(e, "UnifracWeightedCommand", "getHelpString");
67 //**********************************************************************************************************************
68 string UnifracWeightedCommand::getOutputPattern(string type) {
71 if (type == "weighted") { pattern = "[filename],weighted-[filename],[tag],weighted"; }
72 else if (type == "wsummary") { pattern = "[filename],wsummary"; }
73 else if (type == "phylip") { pattern = "[filename],[tag],[tag2],dist"; }
74 else if (type == "column") { pattern = "[filename],[tag],[tag2],dist"; }
75 else if (type == "tree") { pattern = "[filename],[tag],[tag2],tre"; }
76 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
81 m->errorOut(e, "UnifracWeightedCommand", "getOutputPattern");
85 //**********************************************************************************************************************
86 UnifracWeightedCommand::UnifracWeightedCommand(){
88 abort = true; calledHelp = true;
90 vector<string> tempOutNames;
91 outputTypes["weighted"] = tempOutNames;
92 outputTypes["wsummary"] = tempOutNames;
93 outputTypes["phylip"] = tempOutNames;
94 outputTypes["column"] = tempOutNames;
95 outputTypes["tree"] = tempOutNames;
98 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
103 /***********************************************************/
104 UnifracWeightedCommand::UnifracWeightedCommand(string option) {
106 abort = false; calledHelp = false;
108 //allow user to run help
109 if(option == "help") { help(); abort = true; calledHelp = true; }
110 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
113 vector<string> myArray = setParameters();
115 OptionParser parser(option);
116 map<string,string> parameters=parser.getParameters();
117 map<string,string>::iterator it;
119 ValidParameters validParameter;
121 //check to make sure all parameters are valid for command
122 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
123 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
126 //initialize outputTypes
127 vector<string> tempOutNames;
128 outputTypes["weighted"] = tempOutNames;
129 outputTypes["wsummary"] = tempOutNames;
130 outputTypes["phylip"] = tempOutNames;
131 outputTypes["column"] = tempOutNames;
132 outputTypes["tree"] = tempOutNames;
134 //if the user changes the input directory command factory will send this info to us in the output parameter
135 string inputDir = validParameter.validFile(parameters, "inputdir", false);
136 if (inputDir == "not found"){ inputDir = ""; }
139 it = parameters.find("tree");
140 //user has given a template file
141 if(it != parameters.end()){
142 path = m->hasPath(it->second);
143 //if the user has not given a path then, add inputdir. else leave path alone.
144 if (path == "") { parameters["tree"] = inputDir + it->second; }
147 it = parameters.find("group");
148 //user has given a template file
149 if(it != parameters.end()){
150 path = m->hasPath(it->second);
151 //if the user has not given a path then, add inputdir. else leave path alone.
152 if (path == "") { parameters["group"] = inputDir + it->second; }
155 it = parameters.find("name");
156 //user has given a template file
157 if(it != parameters.end()){
158 path = m->hasPath(it->second);
159 //if the user has not given a path then, add inputdir. else leave path alone.
160 if (path == "") { parameters["name"] = inputDir + it->second; }
163 it = parameters.find("count");
164 //user has given a template file
165 if(it != parameters.end()){
166 path = m->hasPath(it->second);
167 //if the user has not given a path then, add inputdir. else leave path alone.
168 if (path == "") { parameters["count"] = inputDir + it->second; }
172 //check for required parameters
173 treefile = validParameter.validFile(parameters, "tree", true);
174 if (treefile == "not open") { treefile = ""; abort = true; }
175 else if (treefile == "not found") { //if there is a current design file, use it
176 treefile = m->getTreeFile();
177 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
178 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
179 }else { m->setTreeFile(treefile); }
181 //check for required parameters
182 groupfile = validParameter.validFile(parameters, "group", true);
183 if (groupfile == "not open") { abort = true; }
184 else if (groupfile == "not found") { groupfile = ""; }
185 else { m->setGroupFile(groupfile); }
187 namefile = validParameter.validFile(parameters, "name", true);
188 if (namefile == "not open") { namefile = ""; abort = true; }
189 else if (namefile == "not found") { namefile = ""; }
190 else { m->setNameFile(namefile); }
192 countfile = validParameter.validFile(parameters, "count", true);
193 if (countfile == "not open") { countfile = ""; abort = true; }
194 else if (countfile == "not found") { countfile = ""; }
195 else { m->setCountTableFile(countfile); }
197 if ((namefile != "") && (countfile != "")) {
198 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
201 if ((groupfile != "") && (countfile != "")) {
202 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
205 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
208 //check for optional parameter and set defaults
209 // ...at some point should added some additional type checking...
210 groups = validParameter.validFile(parameters, "groups", false);
211 if (groups == "not found") { groups = ""; }
213 m->splitAtDash(groups, Groups);
214 m->setGroups(Groups);
217 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
218 m->mothurConvert(itersString, iters);
220 string temp = validParameter.validFile(parameters, "distance", false);
221 if (temp == "not found") { phylip = false; outputForm = ""; }
223 if (temp=="phylip") { temp = "lt"; }
224 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
225 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
228 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "F"; }
229 random = m->isTrue(temp);
231 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
232 includeRoot = m->isTrue(temp);
234 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
235 m->setProcessors(temp);
236 m->mothurConvert(temp, processors);
238 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
239 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
241 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
242 else { subsample = false; }
245 if (!subsample) { subsampleIters = 0; }
246 else { subsampleIters = iters; }
248 temp = validParameter.validFile(parameters, "consensus", false); if (temp == "not found") { temp = "F"; }
249 consensus = m->isTrue(temp);
251 if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; }
252 if (countfile == "") { if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; } }
255 if ((!testCt.testGroups(countfile)) && (subsample)) {
256 m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true;
259 if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
260 if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
263 if (namefile == "") {
264 vector<string> files; files.push_back(treefile);
265 parser.getNameFile(files);
272 catch(exception& e) {
273 m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
277 /***********************************************************/
278 int UnifracWeightedCommand::execute() {
281 if (abort == true) { if (calledHelp) { return 0; } return 2; }
283 m->setTreeFile(treefile);
286 if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
287 else { reader = new TreeReader(treefile, countfile); }
288 T = reader->getTrees();
289 ct = T[0]->getCountTable();
292 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
294 map<string, string> variables;
295 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
296 sumFile = getOutputFileName("wsummary",variables);
297 m->openOutputFile(sumFile, outSum);
298 outputNames.push_back(sumFile); outputTypes["wsummary"].push_back(sumFile);
301 string s; //to make work with setgroups
302 Groups = m->getGroups();
303 vector<string> nameGroups = ct->getNamesOfGroups();
304 if (nameGroups.size() < 2) { m->mothurOut("[ERROR]: You cannot run unifrac.weighted with less than 2 groups, aborting.\n"); delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
305 util.setGroups(Groups, nameGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze
306 m->setGroups(Groups);
308 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
310 Weighted weighted(includeRoot);
312 int start = time(NULL);
316 //user has not set size, set size = smallest samples size
317 if (subsampleSize == -1) {
318 vector<string> temp; temp.push_back(Groups[0]);
319 subsampleSize = ct->getGroupCount(Groups[0]); //num in first group
320 for (int i = 1; i < Groups.size(); i++) {
321 int thisSize = ct->getGroupCount(Groups[i]);
322 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
324 m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
325 }else { //eliminate any too small groups
326 vector<string> newGroups = Groups;
328 for (int i = 0; i < newGroups.size(); i++) {
329 int thisSize = ct->getGroupCount(newGroups[i]);
331 if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); }
332 else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
334 m->setGroups(Groups);
338 //here in case some groups are removed by subsample
339 util.getCombos(groupComb, Groups, numComp);
341 if (numComp < processors) { processors = numComp; }
343 if (consensus && (numComp < 2)) { m->mothurOut("consensus can only be used with numComparisions greater than 1, setting consensus=f.\n"); consensus=false; }
345 //get weighted scores for users trees
346 for (int i = 0; i < T.size(); i++) {
348 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
351 rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
352 uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
354 vector<double> userData; userData.resize(numComp,0); //weighted score info for user tree. data[0] = weightedscore AB, data[1] = weightedscore AC...
355 vector<double> randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC...
358 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
359 variables["[tag]"] = toString(i+1);
360 string wFileName = getOutputFileName("weighted", variables);
361 output = new ColumnFile(wFileName, itersString);
362 outputNames.push_back(wFileName); outputTypes["wweighted"].push_back(wFileName);
365 userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
366 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
369 for (int s=0; s<numComp; s++) {
370 //add users score to vector of user scores
371 uScores[s].push_back(userData[s]);
372 //save users tree score for summary file
373 utreeScores.push_back(userData[s]);
376 if (random) { runRandomCalcs(T[i], userData); }
384 vector< vector<double> > calcDistsTotals; //each iter, each groupCombos dists. this will be used to make .dist files
385 for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //subsampleIters=0, if subsample=f.
386 if (m->control_pressed) { break; }
388 //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
389 CountTable* newCt = new CountTable();
392 if (m->debug) { sampleTime = time(NULL); }
394 //uses method of setting groups to doNotIncludeMe
396 Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
398 if (m->debug) { m->mothurOut("[DEBUG]: iter " + toString(thisIter) + " took " + toString(time(NULL) - sampleTime) + " seconds to sample tree.\n"); }
400 //call new weighted function
401 vector<double> iterData; iterData.resize(numComp,0);
402 Weighted thisWeighted(includeRoot);
403 iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
405 //save data to make ave dist, std dist
406 calcDistsTotals.push_back(iterData);
409 delete subSampleTree;
411 if((thisIter+1) % 100 == 0){ m->mothurOutJustToScreen(toString(thisIter+1)+"\n"); }
414 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
416 if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); }
417 if (consensus) { getConsensusTrees(calcDistsTotals, i); }
421 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
423 if (phylip) { createPhylipFile(); }
427 //clear out users groups
430 for (int i = 0; i < T.size(); i++) { delete T[i]; }
432 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
434 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
436 //set phylip file as new current phylipfile
438 itTypes = outputTypes.find("phylip");
439 if (itTypes != outputTypes.end()) {
440 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
443 //set column file as new current columnfile
444 itTypes = outputTypes.find("column");
445 if (itTypes != outputTypes.end()) {
446 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
449 m->mothurOutEndLine();
450 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
451 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
452 m->mothurOutEndLine();
457 catch(exception& e) {
458 m->errorOut(e, "UnifracWeightedCommand", "execute");
462 /**************************************************************************************************/
463 int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dists, int treeNum) {
465 //we need to find the average distance and standard deviation for each groups distance
468 vector<double> averages = m->getAverages(dists);
470 //find standard deviation
471 vector<double> stdDev = m->getStandardDeviation(dists, averages);
473 //make matrix with scores in it
474 vector< vector<double> > avedists; //avedists.resize(m->getNumGroups());
475 for (int i = 0; i < m->getNumGroups(); i++) {
477 for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
478 avedists.push_back(temp);
481 //make matrix with scores in it
482 vector< vector<double> > stddists; //stddists.resize(m->getNumGroups());
483 for (int i = 0; i < m->getNumGroups(); i++) {
485 for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
486 //stddists[i].resize(m->getNumGroups(), 0.0);
487 stddists.push_back(temp);
491 //flip it so you can print it
493 for (int r=0; r<m->getNumGroups(); r++) {
494 for (int l = 0; l < r; l++) {
495 avedists[r][l] = averages[count];
496 avedists[l][r] = averages[count];
497 stddists[r][l] = stdDev[count];
498 stddists[l][r] = stdDev[count];
503 map<string, string> variables;
504 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
505 variables["[tag]"] = toString(treeNum+1);
506 variables["[tag2]"] = "weighted.ave";
507 string aveFileName = getOutputFileName("phylip",variables);
508 if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); }
509 else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName); }
511 m->openOutputFile(aveFileName, out);
513 variables["[tag2]"] = "weighted.std";
514 string stdFileName = getOutputFileName("phylip",variables);
515 if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); }
516 else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); }
518 m->openOutputFile(stdFileName, outStd);
520 if ((outputForm == "lt") || (outputForm == "square")) {
522 out << m->getNumGroups() << endl;
523 outStd << m->getNumGroups() << endl;
527 for (int r=0; r<m->getNumGroups(); r++) {
529 string name = (m->getGroups())[r];
530 if (name.length() < 10) { //pad with spaces to make compatible
531 while (name.length() < 10) { name += " "; }
534 if (outputForm == "lt") {
536 outStd << name << '\t';
539 for (int l = 0; l < r; l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t';}
540 out << endl; outStd << endl;
541 }else if (outputForm == "square") {
543 outStd << name << '\t';
546 for (int l = 0; l < m->getNumGroups(); l++) { out << avedists[r][l] << '\t'; outStd << stddists[r][l] << '\t'; }
547 out << endl; outStd << endl;
550 for (int l = 0; l < r; l++) {
551 string otherName = (m->getGroups())[l];
552 if (otherName.length() < 10) { //pad with spaces to make compatible
553 while (otherName.length() < 10) { otherName += " "; }
556 out << name << '\t' << otherName << avedists[r][l] << endl;
557 outStd << name << '\t' << otherName << stddists[r][l] << endl;
566 catch(exception& e) {
567 m->errorOut(e, "UnifracWeightedCommand", "getAverageSTDMatrices");
572 /**************************************************************************************************/
573 int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, int treeNum) {
576 //used in tree constructor
579 ///create treemap class from groupmap for tree class to use
582 map<string, string> groupMap;
584 for (int i = 0; i < m->getGroups().size(); i++) {
585 nameMap.insert(m->getGroups()[i]);
586 gps.insert(m->getGroups()[i]);
587 groupMap[m->getGroups()[i]] = m->getGroups()[i];
589 newCt.createTable(nameMap, groupMap, gps);
591 //clear old tree names if any
592 m->Treenames.clear();
594 //fills globaldatas tree names
595 m->Treenames = m->getGroups();
597 vector<Tree*> newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created
599 if (m->control_pressed) { return 0; }
602 Tree* conTree = con.getTree(newTrees);
604 //create a new filename
605 map<string, string> variables;
606 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
607 variables["[tag]"] = toString(treeNum+1);
608 variables["[tag2]"] = "weighted.cons";
609 string conFile = getOutputFileName("tree",variables);
610 outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile);
612 m->openOutputFile(conFile, outTree);
614 if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; }
620 catch(exception& e) {
621 m->errorOut(e, "UnifracWeightedCommand", "getConsensusTrees");
625 /**************************************************************************************************/
627 vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, CountTable& myct) {
632 //create a new filename
633 map<string, string> variables;
634 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(treefile));
635 variables["[tag]"] = toString(treeNum+1);
636 variables["[tag2]"] = "weighted.all";
637 string outputFile = getOutputFileName("tree",variables);
638 outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile);
641 m->openOutputFile(outputFile, outAll);
644 for (int i = 0; i < dists.size(); i++) { //dists[0] are the dists for the first subsampled tree.
646 if (m->control_pressed) { break; }
648 //make matrix with scores in it
649 vector< vector<double> > sims; sims.resize(m->getNumGroups());
650 for (int j = 0; j < m->getNumGroups(); j++) {
651 sims[j].resize(m->getNumGroups(), 0.0);
655 for (int r=0; r<m->getNumGroups(); r++) {
656 for (int l = 0; l < r; l++) {
657 double sim = -(dists[i][count]-1.0);
665 Tree* tempTree = new Tree(&myct, sims);
666 tempTree->assembleTree();
668 trees.push_back(tempTree);
671 tempTree->print(outAll);
676 if (m->control_pressed) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; trees[i] = NULL; } m->mothurRemove(outputFile); }
680 catch(exception& e) {
681 m->errorOut(e, "UnifracWeightedCommand", "buildTrees");
685 /**************************************************************************************************/
687 int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersScores) {
690 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
691 vector< vector<string> > namesOfGroupCombos;
692 for (int a=0; a<numGroups; a++) {
693 for (int l = 0; l < a; l++) {
694 vector<string> groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]);
695 namesOfGroupCombos.push_back(groups);
701 //breakdown work between processors
702 int numPairs = namesOfGroupCombos.size();
703 int numPairsPerProcessor = numPairs / processors;
705 for (int i = 0; i < processors; i++) {
706 int startPos = i * numPairsPerProcessor;
707 if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; }
708 lines.push_back(linePair(startPos, numPairsPerProcessor));
712 //get scores for random trees
713 for (int j = 0; j < iters; j++) {
714 //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
715 //if(processors == 1){
716 // driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
718 createProcesses(thisTree, namesOfGroupCombos, rScores);
721 //driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
725 if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
730 //find the signifigance of the score for summary file
731 for (int f = 0; f < numComp; f++) {
733 sort(rScores[f].begin(), rScores[f].end());
735 //the index of the score higher than yours is returned
736 //so if you have 1000 random trees the index returned is 100
737 //then there are 900 trees with a score greater then you.
738 //giving you a signifigance of 0.900
739 int index = findIndex(usersScores[f], f); if (index == -1) { m->mothurOut("error in UnifracWeightedCommand"); m->mothurOutEndLine(); exit(1); } //error code
741 //the signifigance is the number of trees with the users score or higher
742 WScoreSig.push_back((iters-index)/(float)iters);
745 //out << "Tree# " << i << endl;
746 calculateFreqsCumuls();
753 catch(exception& e) {
754 m->errorOut(e, "UnifracWeightedCommand", "runRandomCalcs");
758 /**************************************************************************************************/
760 int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, vector< vector<double> >& scores) {
763 vector<int> processIDS;
766 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
767 //loop through and create all the processes you want
768 while (process != processors) {
772 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
775 driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, scores);
777 //pass numSeqs to parent
779 string tempFile = outputDir + toString(getpid()) + ".weightedcommand.results.temp";
780 m->openOutputFile(tempFile, out);
781 for (int i = lines[process].start; i < (lines[process].start + lines[process].num); i++) { out << scores[i][(scores[i].size()-1)] << '\t'; } out << endl;
786 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
787 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
792 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
794 //force parent to wait until all the processes are done
795 for (int i=0;i<(processors-1);i++) {
796 int temp = processIDS[i];
800 //get data created by processes
801 for (int i=0;i<(processors-1);i++) {
804 string s = outputDir + toString(processIDS[i]) + ".weightedcommand.results.temp";
805 m->openInputFile(s, in);
808 for (int j = lines[(i+1)].start; j < (lines[(i+1)].start + lines[(i+1)].num); j++) { in >> tempScore; scores[j].push_back(tempScore); }
814 vector<weightedRandomData*> pDataArray;
815 DWORD dwThreadIdArray[processors-1];
816 HANDLE hThreadArray[processors-1];
817 vector<CountTable*> cts;
820 //Create processor worker threads.
821 for( int i=1; i<processors; i++ ){
822 CountTable* copyCount = new CountTable();
824 Tree* copyTree = new Tree(copyCount);
825 copyTree->getCopy(t);
827 cts.push_back(copyCount);
828 trees.push_back(copyTree);
830 vector< vector<double> > copyScores = rScores;
832 weightedRandomData* tempweighted = new weightedRandomData(m, lines[i].start, lines[i].num, namesOfGroupCombos, copyTree, copyCount, includeRoot, copyScores);
833 pDataArray.push_back(tempweighted);
834 processIDS.push_back(i);
836 hThreadArray[i-1] = CreateThread(NULL, 0, MyWeightedRandomThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
839 driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, scores);
841 //Wait until all threads have terminated.
842 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
844 //Close all thread handles and free memory allocations.
845 for(int i=0; i < pDataArray.size(); i++){
846 for (int j = pDataArray[i]->start; j < (pDataArray[i]->start+pDataArray[i]->num); j++) {
847 scores[j].push_back(pDataArray[i]->scores[j][pDataArray[i]->scores[j].size()-1]);
851 CloseHandle(hThreadArray[i]);
852 delete pDataArray[i];
860 catch(exception& e) {
861 m->errorOut(e, "UnifracWeightedCommand", "createProcesses");
866 /**************************************************************************************************/
867 int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) {
869 Tree* randT = new Tree(ct);
871 Weighted weighted(includeRoot);
873 for (int h = start; h < (start+num); h++) {
875 if (m->control_pressed) { return 0; }
877 //initialize weighted score
878 string groupA = namesOfGroupCombos[h][0];
879 string groupB = namesOfGroupCombos[h][1];
884 //create a random tree with same topology as T[i], but different labels
885 randT->assembleRandomUnifracTree(groupA, groupB);
887 if (m->control_pressed) { delete randT; return 0; }
889 //get wscore of random tree
890 EstOutput randomData = weighted.getValues(randT, groupA, groupB);
892 if (m->control_pressed) { delete randT; return 0; }
895 scores[h].push_back(randomData[0]);
903 catch(exception& e) {
904 m->errorOut(e, "UnifracWeightedCommand", "driver");
908 /***********************************************************/
909 void UnifracWeightedCommand::printWeightedFile() {
913 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
915 for(int a = 0; a < numComp; a++) {
916 output->initFile(groupComb[a], tags);
918 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
919 data.push_back(it->first); data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
920 output->output(data);
926 catch(exception& e) {
927 m->errorOut(e, "UnifracWeightedCommand", "printWeightedFile");
933 /***********************************************************/
934 void UnifracWeightedCommand::printWSummaryFile() {
937 outSum << "Tree#" << '\t' << "Groups" << '\t' << "WScore" << '\t';
938 m->mothurOut("Tree#\tGroups\tWScore\t");
939 if (random) { outSum << "WSig"; m->mothurOut("WSig"); }
940 outSum << endl; m->mothurOutEndLine();
943 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
947 for (int i = 0; i < T.size(); i++) {
948 for (int j = 0; j < numComp; j++) {
950 if (WScoreSig[count] > (1/(float)iters)) {
951 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
952 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << WScoreSig[count] << endl;
953 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t" + toString(WScoreSig[count]) + "\n");
955 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
956 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
957 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\t<" + toString((1/float(iters))) + "\n");
960 outSum << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
961 cout << setprecision(6) << i+1 << '\t' << groupComb[j] << '\t' << utreeScores[count] << endl;
962 m->mothurOutJustToLog(toString(i+1) +"\t" + groupComb[j] +"\t" + toString(utreeScores[count]) +"\n");
969 catch(exception& e) {
970 m->errorOut(e, "UnifracWeightedCommand", "printWSummaryFile");
974 /***********************************************************/
975 void UnifracWeightedCommand::createPhylipFile() {
979 for (int i = 0; i < T.size(); i++) {
981 string phylipFileName;
982 map<string, string> variables;
983 variables["[filename]"] = outputDir + m->getSimpleName(treefile);
984 variables["[tag]"] = toString(i+1);
985 if ((outputForm == "lt") || (outputForm == "square")) {
986 variables["[tag2]"] = "weighted.phylip";
987 phylipFileName = getOutputFileName("phylip",variables);
988 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
990 variables["[tag2]"] = "weighted.column";
991 phylipFileName = getOutputFileName("column",variables);
992 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
997 m->openOutputFile(phylipFileName, out);
999 if ((outputForm == "lt") || (outputForm == "square")) {
1001 out << m->getNumGroups() << endl;
1004 //make matrix with scores in it
1005 vector< vector<float> > dists; dists.resize(m->getNumGroups());
1006 for (int i = 0; i < m->getNumGroups(); i++) {
1007 dists[i].resize(m->getNumGroups(), 0.0);
1010 //flip it so you can print it
1011 for (int r=0; r<m->getNumGroups(); r++) {
1012 for (int l = 0; l < r; l++) {
1013 dists[r][l] = utreeScores[count];
1014 dists[l][r] = utreeScores[count];
1020 for (int r=0; r<m->getNumGroups(); r++) {
1022 string name = (m->getGroups())[r];
1023 if (name.length() < 10) { //pad with spaces to make compatible
1024 while (name.length() < 10) { name += " "; }
1027 if (outputForm == "lt") {
1028 out << name << '\t';
1031 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
1033 }else if (outputForm == "square") {
1034 out << name << '\t';
1037 for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; }
1041 for (int l = 0; l < r; l++) {
1042 string otherName = (m->getGroups())[l];
1043 if (otherName.length() < 10) { //pad with spaces to make compatible
1044 while (otherName.length() < 10) { otherName += " "; }
1047 out << name << '\t' << otherName << dists[r][l] << endl;
1054 catch(exception& e) {
1055 m->errorOut(e, "UnifracWeightedCommand", "createPhylipFile");
1059 /***********************************************************/
1060 int UnifracWeightedCommand::findIndex(float score, int index) {
1062 for (int i = 0; i < rScores[index].size(); i++) {
1063 if (rScores[index][i] >= score) { return i; }
1065 return rScores[index].size();
1067 catch(exception& e) {
1068 m->errorOut(e, "UnifracWeightedCommand", "findIndex");
1073 /***********************************************************/
1075 void UnifracWeightedCommand::calculateFreqsCumuls() {
1077 //clear out old tree values
1079 rScoreFreq.resize(numComp);
1081 rCumul.resize(numComp);
1082 validScores.clear();
1084 //calculate frequency
1085 for (int f = 0; f < numComp; f++) {
1086 for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7... you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
1087 validScores[rScores[f][i]] = rScores[f][i];
1088 map<float,float>::iterator it = rScoreFreq[f].find(rScores[f][i]);
1089 if (it != rScoreFreq[f].end()) {
1090 rScoreFreq[f][rScores[f][i]]++;
1092 rScoreFreq[f][rScores[f][i]] = 1;
1098 for(int a = 0; a < numComp; a++) {
1099 float rcumul = 1.0000;
1100 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
1101 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
1102 //make rscoreFreq map and rCumul
1103 map<float,float>::iterator it2 = rScoreFreq[a].find(it->first);
1104 rCumul[a][it->first] = rcumul;
1105 //get percentage of random trees with that info
1106 if (it2 != rScoreFreq[a].end()) { rScoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
1107 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
1112 catch(exception& e) {
1113 m->errorOut(e, "UnifracWeightedCommand", "calculateFreqsCums");
1117 /***********************************************************/