5 * Created by Sarah Westcott on 1/26/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "parsimonycommand.h"
12 //**********************************************************************************************************************
13 vector<string> ParsimonyCommand::getValidParameters(){
15 string Array[] = {"random","groups","iters","processors","outputdir","inputdir"};
16 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
20 m->errorOut(e, "ParsimonyCommand", "getValidParameters");
24 //**********************************************************************************************************************
25 ParsimonyCommand::ParsimonyCommand(){
27 abort = true; calledHelp = true;
28 vector<string> tempOutNames;
29 outputTypes["parsimony"] = tempOutNames;
30 outputTypes["psummary"] = tempOutNames;
33 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
37 //**********************************************************************************************************************
38 vector<string> ParsimonyCommand::getRequiredParameters(){
40 vector<string> myArray;
44 m->errorOut(e, "ParsimonyCommand", "getRequiredParameters");
48 //**********************************************************************************************************************
49 vector<string> ParsimonyCommand::getRequiredFiles(){
51 vector<string> myArray;
55 m->errorOut(e, "ParsimonyCommand", "getRequiredFiles");
59 /***********************************************************/
60 ParsimonyCommand::ParsimonyCommand(string option) {
62 globaldata = GlobalData::getInstance();
63 abort = false; calledHelp = false;
66 //allow user to run help
67 if(option == "help") { help(); abort = true; calledHelp = true; }
70 //valid paramters for this command
71 string Array[] = {"random","groups","processors","iters","outputdir","inputdir"};
72 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
74 OptionParser parser(option);
75 map<string, string> parameters = parser.getParameters();
77 ValidParameters validParameter;
79 //check to make sure all parameters are valid for command
80 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
81 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
84 //initialize outputTypes
85 vector<string> tempOutNames;
86 outputTypes["parsimony"] = tempOutNames;
87 outputTypes["psummary"] = tempOutNames;
89 randomtree = validParameter.validFile(parameters, "random", false); if (randomtree == "not found") { randomtree = ""; }
91 //are you trying to use parsimony without reading a tree or saying you want random distribution
92 if (randomtree == "") {
93 if (globaldata->gTree.size() == 0) {
94 m->mothurOut("You must read a treefile and a groupfile or set the randomtree parameter to the output filename you wish, before you may execute the parsimony command."); m->mothurOutEndLine(); abort = true; }
97 //if the user changes the output directory command factory will send this info to us in the output parameter
98 string outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; if (randomtree == "") { outputDir += m->hasPath(globaldata->inputFileName); } }
100 //check for optional parameter and set defaults
101 // ...at some point should added some additional type checking...
102 groups = validParameter.validFile(parameters, "groups", false);
103 if (groups == "not found") { groups = ""; globaldata->Groups.clear(); }
105 m->splitAtDash(groups, Groups);
106 globaldata->Groups = Groups;
109 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
110 convert(itersString, iters);
112 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; }
113 convert(temp, processors);
115 if (abort == false) {
116 //randomtree will tell us if user had their own treefile or if they just want the random distribution
117 //user has entered their own tree
118 if (randomtree == "") {
119 T = globaldata->gTree;
120 tmap = globaldata->gTreemap;
122 if(outputDir == "") { outputDir += m->hasPath(globaldata->getTreeFile()); }
123 output = new ColumnFile(outputDir + m->getSimpleName(globaldata->getTreeFile()) + ".parsimony", itersString);
124 outputNames.push_back(outputDir + m->getSimpleName(globaldata->getTreeFile()) + ".parsimony");
125 outputTypes["parsimony"].push_back(outputDir + m->getSimpleName(globaldata->getTreeFile()) + ".parsimony");
127 sumFile = outputDir + m->getSimpleName(globaldata->getTreeFile()) + ".psummary";
128 m->openOutputFile(sumFile, outSum);
129 outputNames.push_back(sumFile);
130 outputTypes["psummary"].push_back(sumFile);
131 }else { //user wants random distribution
132 savetmap = globaldata->gTreemap;
135 if(outputDir == "") { outputDir += m->hasPath(randomtree); }
136 output = new ColumnFile(outputDir+ m->getSimpleName(randomtree), itersString);
137 outputNames.push_back(outputDir+ m->getSimpleName(randomtree));
138 outputTypes["parsimony"].push_back(outputDir+ m->getSimpleName(randomtree));
141 //set users groups to analyze
142 util = new SharedUtil();
143 util->setGroups(globaldata->Groups, tmap->namesOfGroups, allGroups, numGroups, "parsimony"); //sets the groups the user wants to analyze
144 util->getCombos(groupComb, globaldata->Groups, numComp);
146 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
148 pars = new Parsimony(tmap);
156 catch(exception& e) {
157 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
162 //**********************************************************************************************************************
164 void ParsimonyCommand::help(){
166 m->mothurOut("The parsimony command can only be executed after a successful read.tree command, unless you use the random parameter.\n");
167 m->mothurOut("The parsimony command parameters are random, groups, processors and iters. No parameters are required.\n");
168 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n");
169 m->mothurOut("The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n");
170 m->mothurOut("The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters).\n");
171 m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n");
172 m->mothurOut("Example parsimony(random=out, iters=500).\n");
173 m->mothurOut("The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony),\n");
174 m->mothurOut("and iters is 1000. The parsimony command output two files: .parsimony and .psummary their descriptions are in the manual.\n");
175 m->mothurOut("Note: No spaces between parameter labels (i.e. random), '=' and parameters (i.e.yourOutputFilename).\n\n");
177 catch(exception& e) {
178 m->errorOut(e, "ParsimonyCommand", "help");
184 /***********************************************************/
185 int ParsimonyCommand::execute() {
188 if (abort == true) { if (calledHelp) { return 0; } return 2; }
191 reading = new Progress("Comparing to random:", iters);
193 if (m->control_pressed) {
194 delete reading; delete pars; delete util; delete output;
195 if (randomtree == "") { outSum.close(); }
196 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear();
197 globaldata->Groups.clear();
202 //get pscore for users tree
203 userData.resize(numComp,0); //data = AB, AC, BC, ABC.
204 randomData.resize(numComp,0); //data = AB, AC, BC, ABC.
205 rscoreFreq.resize(numComp);
206 uscoreFreq.resize(numComp);
207 rCumul.resize(numComp);
208 uCumul.resize(numComp);
209 userTreeScores.resize(numComp);
210 UScoreSig.resize(numComp);
212 if (randomtree == "") {
213 //get pscores for users trees
214 for (int i = 0; i < T.size(); i++) {
215 userData = pars->getValues(T[i], processors, outputDir); //data = AB, AC, BC, ABC.
217 if (m->control_pressed) {
218 delete reading; delete pars; delete util; delete output;
219 if (randomtree == "") { outSum.close(); }
220 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear();
221 globaldata->Groups.clear();
226 //output scores for each combination
227 for(int k = 0; k < numComp; k++) {
230 map<int,double>::iterator it = uscoreFreq[k].find(userData[k]);
231 if (it == uscoreFreq[k].end()) {//new score
232 uscoreFreq[k][userData[k]] = 1;
233 }else{ uscoreFreq[k][userData[k]]++; }
235 //add users score to valid scores
236 validScores[userData[k]] = userData[k];
238 //save score for summary file
239 userTreeScores[k].push_back(userData[k]);
243 //get pscores for random trees
244 for (int j = 0; j < iters; j++) {
246 //create new tree with same num nodes and leaves as users
249 //create random relationships between nodes
250 randT->assembleRandomTree();
252 //get pscore of random tree
253 randomData = pars->getValues(randT, processors, outputDir);
255 if (m->control_pressed) {
256 delete reading; delete pars; delete util; delete output; delete randT;
257 if (randomtree == "") { outSum.close(); }
258 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear();
259 globaldata->Groups.clear();
263 for(int r = 0; r < numComp; r++) {
264 //add trees pscore to map of scores
265 map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
266 if (it != rscoreFreq[r].end()) {//already have that score
267 rscoreFreq[r][randomData[r]]++;
268 }else{//first time we have seen this score
269 rscoreFreq[r][randomData[r]] = 1;
272 //add randoms score to validscores
273 validScores[randomData[r]] = randomData[r];
276 //update progress bar
283 //get pscores for random trees
284 for (int j = 0; j < iters; j++) {
286 //create new tree with same num nodes and leaves as users
288 //create random relationships between nodes
290 randT->assembleRandomTree();
292 if (m->control_pressed) {
293 delete reading; delete pars; delete util; delete output; delete randT;
294 globaldata->gTreemap = savetmap;
295 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear();
296 globaldata->Groups.clear();
301 //get pscore of random tree
302 randomData = pars->getValues(randT, processors, outputDir);
304 if (m->control_pressed) {
305 delete reading; delete pars; delete util; delete output; delete randT;
306 globaldata->gTreemap = savetmap;
307 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear();
308 globaldata->Groups.clear();
312 for(int r = 0; r < numComp; r++) {
313 //add trees pscore to map of scores
314 map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
315 if (it != rscoreFreq[r].end()) {//already have that score
316 rscoreFreq[r][randomData[r]]++;
317 }else{//first time we have seen this score
318 rscoreFreq[r][randomData[r]] = 1;
321 //add randoms score to validscores
322 validScores[randomData[r]] = randomData[r];
325 //update progress bar
332 for(int a = 0; a < numComp; a++) {
333 float rcumul = 0.0000;
334 float ucumul = 0.0000;
335 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
336 for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) {
337 if (randomtree == "") {
338 map<int,double>::iterator it2 = uscoreFreq[a].find(it->first);
339 //user data has that score
340 if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second; }
341 else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score
343 uCumul[a][it->first] = ucumul;
346 //make rscoreFreq map and rCumul
347 map<int,double>::iterator it2 = rscoreFreq[a].find(it->first);
348 //get percentage of random trees with that info
349 if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul+= it2->second; }
350 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
351 rCumul[a][it->first] = rcumul;
354 //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file
355 for (int h = 0; h < userTreeScores[a].size(); h++) {
356 UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]);
360 if (m->control_pressed) {
361 delete reading; delete pars; delete util; delete output;
362 if (randomtree == "") { outSum.close(); }
363 else { globaldata->gTreemap = savetmap; }
364 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear();
365 globaldata->Groups.clear();
369 //finish progress bar
374 printParsimonyFile();
375 if (randomtree == "") { printUSummaryFile(); }
377 //reset globaldata's treemap if you just did random distrib
378 if (randomtree != "") {
379 //memory leak prevention
380 //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap; }
381 globaldata->gTreemap = savetmap;
384 //reset groups parameter
385 globaldata->Groups.clear();
387 if (m->control_pressed) {
388 delete pars; delete util; delete output;
389 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear();
393 m->mothurOutEndLine();
394 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
395 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
396 m->mothurOutEndLine();
402 catch(exception& e) {
403 m->errorOut(e, "ParsimonyCommand", "execute");
408 /***********************************************************/
409 void ParsimonyCommand::printParsimonyFile() {
414 if (randomtree == "") {
415 tags.push_back("Score"); tags.push_back("UserFreq"); tags.push_back("UserCumul"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
417 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
420 for(int a = 0; a < numComp; a++) {
421 output->initFile(groupComb[a], tags);
423 for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) {
424 if (randomtree == "") {
425 data.push_back(it->first); data.push_back(uscoreFreq[a][it->first]); data.push_back(uCumul[a][it->first]); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
427 data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
429 output->output(data);
435 catch(exception& e) {
436 m->errorOut(e, "ParsimonyCommand", "printParsimonyFile");
440 /***********************************************************/
441 int ParsimonyCommand::printUSummaryFile() {
444 outSum << "Tree#" << '\t' << "Groups" << '\t' << "ParsScore" << '\t' << "ParsSig" << endl;
445 m->mothurOut("Tree#\tGroups\tParsScore\tParsSig"); m->mothurOutEndLine();
448 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
452 for (int i = 0; i< T.size(); i++) {
453 for(int a = 0; a < numComp; a++) {
454 if (m->control_pressed) { outSum.close(); return 0; }
455 if (UScoreSig[a][i] > (1/(float)iters)) {
456 outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
457 cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
458 m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString(UScoreSig[a][i])); m->mothurOutEndLine();
460 outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
461 cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
462 m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString((1/float(iters)))); m->mothurOutEndLine();
470 catch(exception& e) {
471 m->errorOut(e, "ParsimonyCommand", "printUSummaryFile");
476 /***********************************************************/
477 void ParsimonyCommand::getUserInput() {
481 tmap = new TreeMap();
483 m->mothurOut("Please enter the number of groups you would like to analyze: ");
485 m->mothurOutJustToLog(toString(numGroups)); m->mothurOutEndLine();
489 numEachGroup.resize(numGroups, 0);
491 for (int i = 1; i <= numGroups; i++) {
492 m->mothurOut("Please enter the number of sequences in group " + toString(i) + ": ");
494 m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
496 //set tmaps seqsPerGroup
497 tmap->seqsPerGroup[toString(i)] = num;
498 tmap->namesOfGroups.push_back(toString(i));
500 //set tmaps namesOfSeqs
501 for (int j = 0; j < num; j++) {
502 tmap->namesOfSeqs.push_back(toString(count));
503 tmap->treemap[toString(count)].groupname = toString(i);
508 //clears buffer so next command doesn't have error
512 //save tmap for later
513 //memory leak prevention
514 //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap; }
515 globaldata->gTreemap = tmap;
516 globaldata->Treenames = tmap->namesOfSeqs;
519 catch(exception& e) {
520 m->errorOut(e, "ParsimonyCommand", "getUserInput");
525 /***********************************************************/