2 * unifracunweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracunweightedcommand.h"
12 //**********************************************************************************************************************
13 vector<string> UnifracUnweightedCommand::getValidParameters(){
15 string Array[] = {"groups","iters","distance","random","root", "processors","outputdir","inputdir"};
16 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
20 m->errorOut(e, "UnifracUnweightedCommand", "getValidParameters");
24 //**********************************************************************************************************************
25 UnifracUnweightedCommand::UnifracUnweightedCommand(){
27 globaldata = GlobalData::getInstance();
28 abort = true; calledHelp = true;
29 vector<string> tempOutNames;
30 outputTypes["unweighted"] = tempOutNames;
31 outputTypes["uwsummary"] = tempOutNames;
32 outputTypes["phylip"] = tempOutNames;
33 outputTypes["column"] = tempOutNames;
36 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
40 //**********************************************************************************************************************
41 vector<string> UnifracUnweightedCommand::getRequiredParameters(){
43 vector<string> myArray;
47 m->errorOut(e, "UnifracUnweightedCommand", "getRequiredParameters");
51 //**********************************************************************************************************************
52 vector<string> UnifracUnweightedCommand::getRequiredFiles(){
54 string Array[] = {"tree","group"};
55 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
60 m->errorOut(e, "UnifracUnweightedCommand", "getRequiredFiles");
64 /***********************************************************/
65 UnifracUnweightedCommand::UnifracUnweightedCommand(string option) {
67 globaldata = GlobalData::getInstance();
68 abort = false; calledHelp = false;
71 //allow user to run help
72 if(option == "help") { help(); abort = true; calledHelp = true; }
75 //valid paramters for this command
76 string Array[] = {"groups","iters","distance","random","root", "processors","outputdir","inputdir"};
77 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
79 OptionParser parser(option);
80 map<string,string> parameters = parser.getParameters();
82 ValidParameters validParameter;
84 //check to make sure all parameters are valid for command
85 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
86 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
89 //initialize outputTypes
90 vector<string> tempOutNames;
91 outputTypes["unweighted"] = tempOutNames;
92 outputTypes["uwsummary"] = tempOutNames;
93 outputTypes["phylip"] = tempOutNames;
94 outputTypes["column"] = tempOutNames;
96 if (globaldata->gTree.size() == 0) {//no trees were read
97 m->mothurOut("You must execute the read.tree command, before you may execute the unifrac.unweighted command."); m->mothurOutEndLine(); abort = true; }
99 //if the user changes the output directory command factory will send this info to us in the output parameter
100 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
102 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it
105 //check for optional parameter and set defaults
106 // ...at some point should added some additional type checking...
107 groups = validParameter.validFile(parameters, "groups", false);
108 if (groups == "not found") { groups = ""; }
110 m->splitAtDash(groups, Groups);
111 globaldata->Groups = Groups;
114 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
115 convert(itersString, iters);
117 string temp = validParameter.validFile(parameters, "distance", false);
118 if (temp == "not found") { phylip = false; outputForm = ""; }
120 if ((temp == "lt") || (temp == "column") || (temp == "square")) { phylip = true; outputForm = temp; }
121 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
124 temp = validParameter.validFile(parameters, "random", false); if (temp == "not found") { temp = "f"; }
125 random = m->isTrue(temp);
127 temp = validParameter.validFile(parameters, "root", false); if (temp == "not found") { temp = "F"; }
128 includeRoot = m->isTrue(temp);
130 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; }
131 convert(temp, processors);
133 if (!random) { iters = 0; } //turn off random calcs
135 //if user selects distance = true and no groups it won't calc the pairwise
136 if ((phylip) && (Groups.size() == 0)) {
138 m->splitAtDash(groups, Groups);
139 globaldata->Groups = Groups;
142 if (abort == false) {
143 T = globaldata->gTree;
144 tmap = globaldata->gTreemap;
145 sumFile = outputDir + m->getSimpleName(globaldata->getTreeFile()) + ".uwsummary";
146 outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
147 m->openOutputFile(sumFile, outSum);
149 util = new SharedUtil();
150 util->setGroups(globaldata->Groups, tmap->namesOfGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze
151 util->getCombos(groupComb, globaldata->Groups, numComp);
153 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
155 unweighted = new Unweighted(tmap, includeRoot);
162 catch(exception& e) {
163 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
168 //**********************************************************************************************************************
170 void UnifracUnweightedCommand::help(){
172 m->mothurOut("The unifrac.unweighted command can only be executed after a successful read.tree command.\n");
173 m->mothurOut("The unifrac.unweighted command parameters are groups, iters, distance, processors, root and random. No parameters are required.\n");
174 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n");
175 m->mothurOut("The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n");
176 m->mothurOut("The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n");
177 m->mothurOut("The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n");
178 m->mothurOut("The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n");
179 m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n");
180 m->mothurOut("The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n");
181 m->mothurOut("Example unifrac.unweighted(groups=A-B-C, iters=500).\n");
182 m->mothurOut("The default value for groups is all the groups in your groupfile, and iters is 1000.\n");
183 m->mothurOut("The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual.\n");
184 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
186 catch(exception& e) {
187 m->errorOut(e, "UnifracUnweightedCommand", "help");
193 /***********************************************************/
194 int UnifracUnweightedCommand::execute() {
197 if (abort == true) { if (calledHelp) { return 0; } return 2; }
199 int start = time(NULL);
201 userData.resize(numComp,0); //data[0] = unweightedscore
202 randomData.resize(numComp,0); //data[0] = unweightedscore
203 //create new tree with same num nodes and leaves as users
205 if (numComp < processors) { processors = numComp; }
207 outSum << "Tree#" << '\t' << "Groups" << '\t' << "UWScore" <<'\t' << "UWSig" << endl;
208 m->mothurOut("Tree#\tGroups\tUWScore\tUWSig"); m->mothurOutEndLine();
210 //get pscores for users trees
211 for (int i = 0; i < T.size(); i++) {
212 if (m->control_pressed) {
214 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
221 output = new ColumnFile(outputDir + m->getSimpleName(globaldata->getTreeFile()) + toString(i+1) + ".unweighted", itersString);
222 outputNames.push_back(outputDir + m->getSimpleName(globaldata->getTreeFile()) + toString(i+1) + ".unweighted");
223 outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(globaldata->getTreeFile()) + toString(i+1) + ".unweighted");
227 //get unweighted for users tree
228 rscoreFreq.resize(numComp);
229 rCumul.resize(numComp);
230 utreeScores.resize(numComp);
231 UWScoreSig.resize(numComp);
233 userData = unweighted->getValues(T[i], processors, outputDir); //userData[0] = unweightedscore
235 if (m->control_pressed) { if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }return 0; }
237 //output scores for each combination
238 for(int k = 0; k < numComp; k++) {
240 utreeScores[k].push_back(userData[k]);
242 //add users score to validscores
243 validScores[userData[k]] = userData[k];
246 //get unweighted scores for random trees - if random is false iters = 0
247 for (int j = 0; j < iters; j++) {
249 //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison
250 randomData = unweighted->getValues(T[i], "", "", processors, outputDir);
252 if (m->control_pressed) { if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
254 for(int k = 0; k < numComp; k++) {
255 //add trees unweighted score to map of scores
256 map<float,float>::iterator it = rscoreFreq[k].find(randomData[k]);
257 if (it != rscoreFreq[k].end()) {//already have that score
258 rscoreFreq[k][randomData[k]]++;
259 }else{//first time we have seen this score
260 rscoreFreq[k][randomData[k]] = 1;
263 //add randoms score to validscores
264 validScores[randomData[k]] = randomData[k];
268 // m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();
271 for(int a = 0; a < numComp; a++) {
272 float rcumul = 1.0000;
275 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
276 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
277 //make rscoreFreq map and rCumul
278 map<float,float>::iterator it2 = rscoreFreq[a].find(it->first);
279 rCumul[a][it->first] = rcumul;
280 //get percentage of random trees with that info
281 if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
282 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
284 UWScoreSig[a].push_back(rCumul[a][userData[a]]);
285 }else { UWScoreSig[a].push_back(0.0); }
289 if (m->control_pressed) { if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
292 printUWSummaryFile(i);
293 if (random) { printUnweightedFile(); delete output; }
294 if (phylip) { createPhylipFile(i); }
306 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
308 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
310 m->mothurOutEndLine();
311 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
312 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
313 m->mothurOutEndLine();
318 catch(exception& e) {
319 m->errorOut(e, "UnifracUnweightedCommand", "execute");
323 /***********************************************************/
324 void UnifracUnweightedCommand::printUnweightedFile() {
329 tags.push_back("Score");
330 tags.push_back("RandFreq"); tags.push_back("RandCumul");
332 for(int a = 0; a < numComp; a++) {
333 output->initFile(groupComb[a], tags);
335 for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) {
336 data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
337 output->output(data);
343 catch(exception& e) {
344 m->errorOut(e, "UnifracUnweightedCommand", "printUnweightedFile");
349 /***********************************************************/
350 void UnifracUnweightedCommand::printUWSummaryFile(int i) {
354 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
358 for(int a = 0; a < numComp; a++) {
359 outSum << i+1 << '\t';
360 m->mothurOut(toString(i+1) + "\t");
363 if (UWScoreSig[a][0] > (1/(float)iters)) {
364 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
365 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
366 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t" + toString(UWScoreSig[a][0])+ "\n");
368 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
369 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
370 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t<" + toString((1/float(iters))) + "\n");
373 outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << "0.00" << endl;
374 cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << "0.00" << endl;
375 m->mothurOutJustToLog(groupComb[a] + "\t" + toString(utreeScores[a][0]) + "\t0.00\n");
380 catch(exception& e) {
381 m->errorOut(e, "UnifracUnweightedCommand", "printUWSummaryFile");
385 /***********************************************************/
386 void UnifracUnweightedCommand::createPhylipFile(int i) {
388 string phylipFileName;
389 if ((outputForm == "lt") || (outputForm == "square")) {
390 phylipFileName = outputDir + m->getSimpleName(globaldata->getTreeFile()) + toString(i+1) + ".unweighted.phylip.dist";
391 outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName);
393 phylipFileName = outputDir + m->getSimpleName(globaldata->getTreeFile()) + toString(i+1) + ".unweighted.column.dist";
394 outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName);
398 m->openOutputFile(phylipFileName, out);
400 if ((outputForm == "lt") || (outputForm == "square")) {
402 out << globaldata->Groups.size() << endl;
405 //make matrix with scores in it
406 vector< vector<float> > dists; dists.resize(globaldata->Groups.size());
407 for (int i = 0; i < globaldata->Groups.size(); i++) {
408 dists[i].resize(globaldata->Groups.size(), 0.0);
411 //flip it so you can print it
413 for (int r=0; r<globaldata->Groups.size(); r++) {
414 for (int l = 0; l < r; l++) {
415 dists[r][l] = utreeScores[count][0];
416 dists[l][r] = utreeScores[count][0];
422 for (int r=0; r<globaldata->Groups.size(); r++) {
424 string name = globaldata->Groups[r];
425 if (name.length() < 10) { //pad with spaces to make compatible
426 while (name.length() < 10) { name += " "; }
429 if (outputForm == "lt") {
433 for (int l = 0; l < r; l++) { out << dists[r][l] << '\t'; }
435 }else if (outputForm == "square") {
439 for (int l = 0; l < globaldata->Groups.size(); l++) { out << dists[r][l] << '\t'; }
443 for (int l = 0; l < r; l++) {
444 string otherName = globaldata->Groups[l];
445 if (otherName.length() < 10) { //pad with spaces to make compatible
446 while (otherName.length() < 10) { otherName += " "; }
449 out << name << '\t' << otherName << dists[r][l] << endl;
455 catch(exception& e) {
456 m->errorOut(e, "UnifracUnweightedCommand", "createPhylipFile");
460 /***********************************************************/