2 * unifracunweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracunweightedcommand.h"
11 #include "treereader.h"
13 //**********************************************************************************************************************
14 vector<string> UnifracUnweightedCommand::setParameters(){
16 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
17 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
18 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
19 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
20 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
21 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
22 CommandParameter prandom("random", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prandom);
23 CommandParameter pdistance("distance", "Multiple", "column-lt-square", "column", "", "", "",false,false); parameters.push_back(pdistance);
24 CommandParameter proot("root", "Boolean", "F", "", "", "", "",false,false); parameters.push_back(proot);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28 vector<string> myArray;
29 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
33 m->errorOut(e, "UnifracUnweightedCommand", "setParameters");
37 //**********************************************************************************************************************
38 string UnifracUnweightedCommand::getHelpString(){
40 string helpString = "";
41 helpString += "The unifrac.unweighted command parameters are tree, group, name, groups, iters, distance, processors, root and random. tree parameter is required unless you have valid current tree file.\n";
42 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n";
43 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
44 helpString += "The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n";
45 helpString += "The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n";
46 helpString += "The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n";
47 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
48 helpString += "The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n";
49 helpString += "Example unifrac.unweighted(groups=A-B-C, iters=500).\n";
50 helpString += "The default value for groups is all the groups in your groupfile, and iters is 1000.\n";
51 helpString += "The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual.\n";
52 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
56 m->errorOut(e, "UnifracUnweightedCommand", "getHelpString");
60 //**********************************************************************************************************************
61 UnifracUnweightedCommand::UnifracUnweightedCommand(){
63 abort = true; calledHelp = true;
65 vector<string> tempOutNames;
66 outputTypes["unweighted"] = tempOutNames;
67 outputTypes["uwsummary"] = tempOutNames;
68 outputTypes["phylip"] = tempOutNames;
69 outputTypes["column"] = tempOutNames;
72 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
76 /***********************************************************/
77 UnifracUnweightedCommand::UnifracUnweightedCommand(string option) {
79 abort = false; calledHelp = false;
82 //allow user to run help
83 if(option == "help") { help(); abort = true; calledHelp = true; }
84 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
87 vector<string> myArray = setParameters();
89 OptionParser parser(option);
90 map<string,string> parameters = parser.getParameters();
91 map<string,string>::iterator it;
93 ValidParameters validParameter;
95 //check to make sure all parameters are valid for command
96 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
97 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
100 //initialize outputTypes
101 vector<string> tempOutNames;
102 outputTypes["unweighted"] = tempOutNames;
103 outputTypes["uwsummary"] = tempOutNames;
104 outputTypes["phylip"] = tempOutNames;
105 outputTypes["column"] = tempOutNames;
107 //if the user changes the input directory command factory will send this info to us in the output parameter
108 string inputDir = validParameter.validFile(parameters, "inputdir", false);
109 if (inputDir == "not found"){ inputDir = ""; }
112 it = parameters.find("tree");
113 //user has given a template file
114 if(it != parameters.end()){
115 path = m->hasPath(it->second);
116 //if the user has not given a path then, add inputdir. else leave path alone.
117 if (path == "") { parameters["tree"] = inputDir + it->second; }
120 it = parameters.find("group");
121 //user has given a template file
122 if(it != parameters.end()){
123 path = m->hasPath(it->second);
124 //if the user has not given a path then, add inputdir. else leave path alone.
125 if (path == "") { parameters["group"] = inputDir + it->second; }
128 it = parameters.find("name");
129 //user has given a template file
130 if(it != parameters.end()){
131 path = m->hasPath(it->second);
132 //if the user has not given a path then, add inputdir. else leave path alone.
133 if (path == "") { parameters["name"] = inputDir + it->second; }
137 //check for required parameters
138 treefile = validParameter.validFile(parameters, "tree", true);
139 if (treefile == "not open") { abort = true; }
140 else if (treefile == "not found") { //if there is a current design file, use it
141 treefile = m->getTreeFile();
142 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
143 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
144 }else { m->setTreeFile(treefile); }
146 //check for required parameters
147 groupfile = validParameter.validFile(parameters, "group", true);
148 if (groupfile == "not open") { abort = true; }
149 else if (groupfile == "not found") { groupfile = ""; }
150 else { m->setGroupFile(groupfile); }
152 namefile = validParameter.validFile(parameters, "name", true);
153 if (namefile == "not open") { namefile = ""; abort = true; }
154 else if (namefile == "not found") { namefile = ""; }
155 else { m->setNameFile(namefile); }
157 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
159 //check for optional parameter and set defaults
160 // ...at some point should added some additional type checking...
161 groups = validParameter.validFile(parameters, "groups", false);
162 if (groups == "not found") { groups = ""; }
164 m->splitAtDash(groups, Groups);
165 m->setGroups(Groups);
168 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
169 m->mothurConvert(itersString, iters);
171 string temp = validParameter.validFile(parameters, "distance", false);
172 if (temp == "not found") { phylip = false; outputForm = ""; }
174 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
175 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
178 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "f"; }
179 random = m->isTrue(temp);
181 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
182 includeRoot = m->isTrue(temp);
184 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
185 m->setProcessors(temp);
186 m->mothurConvert(temp, processors);
188 if (!random) { iters = 0; } //turn off random calcs
190 //if user selects distance = true and no groups it won't calc the pairwise
191 if ((phylip) && (Groups.size() == 0)) {
193 m->splitAtDash(groups, Groups);
194 m->setGroups(Groups);
197 if (namefile == "") {
198 vector<string> files; files.push_back(treefile);
199 parser.getNameFile(files);
204 catch(exception& e) {
205 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
210 /***********************************************************/
211 int UnifracUnweightedCommand::execute() {
214 if (abort == true) { if (calledHelp) { return 0; } return 2; }
216 m->setTreeFile(treefile);
218 TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
219 T = reader->getTrees();
220 tmap = T[0]->getTreeMap();
221 map<string, string> nameMap = reader->getNameMap();
224 sumFile = outputDir + m->getSimpleName(treefile) + ".uwsummary";
225 outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
226 m->openOutputFile(sumFile, outSum);
229 Groups = m->getGroups();
230 vector<string> namesGroups = tmap->getNamesOfGroups();
231 util.setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze
232 util.getCombos(groupComb, Groups, numComp);
233 m->setGroups(Groups);
235 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
237 Unweighted unweighted(includeRoot);
239 int start = time(NULL);
241 userData.resize(numComp,0); //data[0] = unweightedscore
242 randomData.resize(numComp,0); //data[0] = unweightedscore
243 //create new tree with same num nodes and leaves as users
245 if (numComp < processors) { processors = numComp; }
247 outSum << "Tree#" << '\t' << "Groups" << '\t' << "UWScore" <<'\t';
248 m->mothurOut("Tree#\tGroups\tUWScore\t");
249 if (random) { outSum << "UWSig"; m->mothurOut("UWSig"); }
250 outSum << endl; m->mothurOutEndLine();
252 //get pscores for users trees
253 for (int i = 0; i < T.size(); i++) {
254 if (m->control_pressed) {
256 for (int i = 0; i < T.size(); i++) { delete T[i]; }
258 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }
265 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted", itersString);
266 outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted");
267 outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted");
271 //get unweighted for users tree
272 rscoreFreq.resize(numComp);
273 rCumul.resize(numComp);
274 utreeScores.resize(numComp);
275 UWScoreSig.resize(numComp);
277 userData = unweighted.getValues(T[i], processors, outputDir); //userData[0] = unweightedscore
279 if (m->control_pressed) { delete tmap;
280 for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }return 0; }
282 //output scores for each combination
283 for(int k = 0; k < numComp; k++) {
285 utreeScores[k].push_back(userData[k]);
287 //add users score to validscores
288 validScores[userData[k]] = userData[k];
291 //get unweighted scores for random trees - if random is false iters = 0
292 for (int j = 0; j < iters; j++) {
294 //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison
295 randomData = unweighted.getValues(T[i], "", "", processors, outputDir);
297 if (m->control_pressed) { delete tmap;
298 for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
300 for(int k = 0; k < numComp; k++) {
301 //add trees unweighted score to map of scores
302 map<float,float>::iterator it = rscoreFreq[k].find(randomData[k]);
303 if (it != rscoreFreq[k].end()) {//already have that score
304 rscoreFreq[k][randomData[k]]++;
305 }else{//first time we have seen this score
306 rscoreFreq[k][randomData[k]] = 1;
309 //add randoms score to validscores
310 validScores[randomData[k]] = randomData[k];
314 // m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();
317 for(int a = 0; a < numComp; a++) {
318 float rcumul = 1.0000;
321 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
322 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
323 //make rscoreFreq map and rCumul
324 map<float,float>::iterator it2 = rscoreFreq[a].find(it->first);
325 rCumul[a][it->first] = rcumul;
326 //get percentage of random trees with that info
327 if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
328 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
330 UWScoreSig[a].push_back(rCumul[a][userData[a]]);
331 }else { UWScoreSig[a].push_back(0.0); }
335 if (m->control_pressed) { delete tmap;
336 for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
339 printUWSummaryFile(i);
340 if (random) { printUnweightedFile(); delete output; }
341 if (phylip) { createPhylipFile(i); }
353 for (int i = 0; i < T.size(); i++) { delete T[i]; }
355 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
357 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
359 //set phylip file as new current phylipfile
361 itTypes = outputTypes.find("phylip");
362 if (itTypes != outputTypes.end()) {
363 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
366 //set column file as new current columnfile
367 itTypes = outputTypes.find("column");
368 if (itTypes != outputTypes.end()) {
369 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
372 m->mothurOutEndLine();
373 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
374 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
375 m->mothurOutEndLine();
380 catch(exception& e) {
381 m->errorOut(e, "UnifracUnweightedCommand", "execute");
385 /***********************************************************/
386 void UnifracUnweightedCommand::printUnweightedFile() {
391 tags.push_back("Score");
392 tags.push_back("RandFreq"); tags.push_back("RandCumul");
394 for(int a = 0; a < numComp; a++) {
395 output->initFile(groupComb[a], tags);
397 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
398 data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
399 output->output(data);
405 catch(exception& e) {
406 m->errorOut(e, "UnifracUnweightedCommand", "printUnweightedFile");
411 /***********************************************************/
412 void UnifracUnweightedCommand::printUWSummaryFile(int i) {
416 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
420 for(int a = 0; a < numComp; a++) {
421 outSum << i+1 << '\t';
422 m->mothurOut(toString(i+1) + "\t");
425 if (UWScoreSig[a][0] > (1/(float)iters)) {
426 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
427 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
428 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t" + toString(UWScoreSig[a][0])+ "\n");
430 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
431 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
432 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t<" + toString((1/float(iters))) + "\n");
435 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << endl;
436 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << endl;
437 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\n");
442 catch(exception& e) {
443 m->errorOut(e, "UnifracUnweightedCommand", "printUWSummaryFile");
447 /***********************************************************/
448 void UnifracUnweightedCommand::createPhylipFile(int i) {
450 string phylipFileName;
451 if ((outputForm == "lt") || (outputForm == "square")) {
452 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.phylip.dist";
453 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
455 phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".unweighted.column.dist";
456 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
460 m->openOutputFile(phylipFileName, out);
462 if ((outputForm == "lt") || (outputForm == "square")) {
464 out << m->getNumGroups() << endl;
467 //make matrix with scores in it
468 vector< vector<float> > dists; dists.resize(m->getNumGroups());
469 for (int i = 0; i < m->getNumGroups(); i++) {
470 dists[i].resize(m->getNumGroups(), 0.0);
473 //flip it so you can print it
475 for (int r=0; r<m->getNumGroups(); r++) {
476 for (int l = 0; l < r; l++) {
477 dists[r][l] = utreeScores[count][0];
478 dists[l][r] = utreeScores[count][0];
484 for (int r=0; r<m->getNumGroups(); r++) {
486 string name = (m->getGroups())[r];
487 if (name.length() < 10) { //pad with spaces to make compatible
488 while (name.length() < 10) { name += " "; }
491 if (outputForm == "lt") {
495 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
497 }else if (outputForm == "square") {
501 for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; }
505 for (int l = 0; l < r; l++) {
506 string otherName = (m->getGroups())[l];
507 if (otherName.length() < 10) { //pad with spaces to make compatible
508 while (otherName.length() < 10) { otherName += " "; }
511 out << name << '\t' << otherName << dists[r][l] << endl;
517 catch(exception& e) {
518 m->errorOut(e, "UnifracUnweightedCommand", "createPhylipFile");
522 /***********************************************************/