5 * Created by Sarah Westcott on 1/26/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "parsimonycommand.h"
11 #include "treereader.h"
13 //**********************************************************************************************************************
14 vector<string> ParsimonyCommand::setParameters(){
16 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","parsimony-psummary",false,true,true); parameters.push_back(ptree);
17 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
18 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
19 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
20 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
21 CommandParameter prandom("random", "String", "", "", "", "", "","",false,false); parameters.push_back(prandom);
22 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
23 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
24 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
25 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
27 vector<string> myArray;
28 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
32 m->errorOut(e, "ParsimonyCommand", "setParameters");
36 //**********************************************************************************************************************
37 string ParsimonyCommand::getHelpString(){
39 string helpString = "";
40 helpString += "The parsimony command parameters are tree, group, name, count, random, groups, processors and iters. tree parameter is required unless you have valid current tree file or are using random.\n";
41 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n";
42 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
43 helpString += "The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters).\n";
44 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
45 helpString += "Example parsimony(random=out, iters=500).\n";
46 helpString += "The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony),\n";
47 helpString += "and iters is 1000. The parsimony command output two files: .parsimony and .psummary their descriptions are in the manual.\n";
48 helpString += "Note: No spaces between parameter labels (i.e. random), '=' and parameters (i.e.yourOutputFilename).\n";
52 m->errorOut(e, "ParsimonyCommand", "getHelpString");
56 //**********************************************************************************************************************
57 string ParsimonyCommand::getOutputPattern(string type) {
61 if (type == "parsimony") { pattern = "[filename],parsimony"; }
62 else if (type == "psummary") { pattern = "[filename],psummary"; }
63 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
68 m->errorOut(e, "ParsimonyCommand", "getOutputPattern");
72 //**********************************************************************************************************************
73 ParsimonyCommand::ParsimonyCommand(){
75 abort = true; calledHelp = true;
77 vector<string> tempOutNames;
78 outputTypes["parsimony"] = tempOutNames;
79 outputTypes["psummary"] = tempOutNames;
82 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
86 /***********************************************************/
87 ParsimonyCommand::ParsimonyCommand(string option) {
89 abort = false; calledHelp = false;
92 //allow user to run help
93 if(option == "help") { help(); abort = true; calledHelp = true; }
94 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
97 vector<string> myArray = setParameters();
99 OptionParser parser(option);
100 map<string, string> parameters = parser.getParameters();
101 map<string,string>::iterator it;
103 ValidParameters validParameter;
105 //check to make sure all parameters are valid for command
106 for (it = parameters.begin(); it != parameters.end(); it++) {
107 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
110 //initialize outputTypes
111 vector<string> tempOutNames;
112 outputTypes["parsimony"] = tempOutNames;
113 outputTypes["psummary"] = tempOutNames;
115 //if the user changes the input directory command factory will send this info to us in the output parameter
116 string inputDir = validParameter.validFile(parameters, "inputdir", false);
117 if (inputDir == "not found"){ inputDir = ""; }
120 it = parameters.find("tree");
121 //user has given a template file
122 if(it != parameters.end()){
123 path = m->hasPath(it->second);
124 //if the user has not given a path then, add inputdir. else leave path alone.
125 if (path == "") { parameters["tree"] = inputDir + it->second; }
128 it = parameters.find("group");
129 //user has given a template file
130 if(it != parameters.end()){
131 path = m->hasPath(it->second);
132 //if the user has not given a path then, add inputdir. else leave path alone.
133 if (path == "") { parameters["group"] = inputDir + it->second; }
136 it = parameters.find("name");
137 //user has given a template file
138 if(it != parameters.end()){
139 path = m->hasPath(it->second);
140 //if the user has not given a path then, add inputdir. else leave path alone.
141 if (path == "") { parameters["name"] = inputDir + it->second; }
144 it = parameters.find("count");
145 //user has given a template file
146 if(it != parameters.end()){
147 path = m->hasPath(it->second);
148 //if the user has not given a path then, add inputdir. else leave path alone.
149 if (path == "") { parameters["count"] = inputDir + it->second; }
153 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
155 randomtree = validParameter.validFile(parameters, "random", false); if (randomtree == "not found") { randomtree = ""; }
157 //are you trying to use parsimony without reading a tree or saying you want random distribution
158 if (randomtree == "") {
159 //check for required parameters
160 treefile = validParameter.validFile(parameters, "tree", true);
161 if (treefile == "not open") { treefile = ""; abort = true; }
162 else if (treefile == "not found") { //if there is a current design file, use it
163 treefile = m->getTreeFile();
164 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
165 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
166 }else { m->setTreeFile(treefile); }
168 //check for required parameters
169 groupfile = validParameter.validFile(parameters, "group", true);
170 if (groupfile == "not open") { abort = true; }
171 else if (groupfile == "not found") { groupfile = ""; }
172 else { m->setGroupFile(groupfile); }
174 namefile = validParameter.validFile(parameters, "name", true);
175 if (namefile == "not open") { namefile = ""; abort = true; }
176 else if (namefile == "not found") { namefile = ""; }
177 else { m->setNameFile(namefile); }
179 countfile = validParameter.validFile(parameters, "count", true);
180 if (countfile == "not open") { countfile = ""; abort = true; }
181 else if (countfile == "not found") { countfile = ""; }
182 else { m->setCountTableFile(countfile); }
184 if ((namefile != "") && (countfile != "")) {
185 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
188 if ((groupfile != "") && (countfile != "")) {
189 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
194 //if the user changes the output directory command factory will send this info to us in the output parameter
195 string outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; if (randomtree == "") { outputDir += m->hasPath(treefile); } }
197 //check for optional parameter and set defaults
198 // ...at some point should added some additional type checking...
199 groups = validParameter.validFile(parameters, "groups", false);
200 if (groups == "not found") { groups = ""; m->clearGroups(); }
202 m->splitAtDash(groups, Groups);
203 m->setGroups(Groups);
206 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
207 m->mothurConvert(itersString, iters);
209 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
210 m->setProcessors(temp);
211 m->mothurConvert(temp, processors);
214 if (namefile == "") {
215 vector<string> files; files.push_back(treefile);
216 parser.getNameFile(files);
223 catch(exception& e) {
224 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
228 /***********************************************************/
229 int ParsimonyCommand::execute() {
232 if (abort == true) { if (calledHelp) { return 0; } return 2; }
235 //randomtree will tell us if user had their own treefile or if they just want the random distribution
236 //user has entered their own tree
237 if (randomtree == "") {
239 m->setTreeFile(treefile);
242 if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
243 else { reader = new TreeReader(treefile, countfile); }
244 T = reader->getTrees();
245 ct = T[0]->getCountTable();
248 if(outputDir == "") { outputDir += m->hasPath(treefile); }
249 map<string, string> variables;
250 variables["[filename]"] = outputDir + m->getSimpleName(treefile) + ".";
252 output = new ColumnFile(getOutputFileName("parsimony",variables), itersString);
253 outputNames.push_back(getOutputFileName("parsimony",variables));
254 outputTypes["parsimony"].push_back(getOutputFileName("parsimony",variables));
256 sumFile = getOutputFileName("psummary",variables);
257 m->openOutputFile(sumFile, outSum);
258 outputNames.push_back(sumFile);
259 outputTypes["psummary"].push_back(sumFile);
260 }else { //user wants random distribution
263 if(outputDir == "") { outputDir += m->hasPath(randomtree); }
264 output = new ColumnFile(outputDir+ m->getSimpleName(randomtree), itersString);
265 outputNames.push_back(outputDir+ m->getSimpleName(randomtree));
266 outputTypes["parsimony"].push_back(outputDir+ m->getSimpleName(randomtree));
269 //set users groups to analyze
271 vector<string> mGroups = m->getGroups();
272 vector<string> tGroups = ct->getNamesOfGroups();
273 util.setGroups(mGroups, tGroups, allGroups, numGroups, "parsimony"); //sets the groups the user wants to analyze
274 util.getCombos(groupComb, mGroups, numComp);
275 m->setGroups(mGroups);
277 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
283 reading = new Progress("Comparing to random:", iters);
285 if (m->control_pressed) {
286 delete reading; delete output;
287 delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
288 if (randomtree == "") { outSum.close(); }
289 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
295 //get pscore for users tree
296 userData.resize(numComp,0); //data = AB, AC, BC, ABC.
297 randomData.resize(numComp,0); //data = AB, AC, BC, ABC.
298 rscoreFreq.resize(numComp);
299 uscoreFreq.resize(numComp);
300 rCumul.resize(numComp);
301 uCumul.resize(numComp);
302 userTreeScores.resize(numComp);
303 UScoreSig.resize(numComp);
305 if (randomtree == "") {
306 //get pscores for users trees
307 for (int i = 0; i < T.size(); i++) {
308 userData = pars.getValues(T[i], processors, outputDir); //data = AB, AC, BC, ABC.
310 if (m->control_pressed) {
311 delete reading; delete output;
312 delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
313 if (randomtree == "") { outSum.close(); }
314 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
320 //output scores for each combination
321 for(int k = 0; k < numComp; k++) {
324 map<int,double>::iterator it = uscoreFreq[k].find(userData[k]);
325 if (it == uscoreFreq[k].end()) {//new score
326 uscoreFreq[k][userData[k]] = 1;
327 }else{ uscoreFreq[k][userData[k]]++; }
329 //add users score to valid scores
330 validScores[userData[k]] = userData[k];
332 //save score for summary file
333 userTreeScores[k].push_back(userData[k]);
337 //get pscores for random trees
338 for (int j = 0; j < iters; j++) {
340 //create new tree with same num nodes and leaves as users
341 randT = new Tree(ct);
343 //create random relationships between nodes
344 randT->assembleRandomTree();
346 //get pscore of random tree
347 randomData = pars.getValues(randT, processors, outputDir);
349 if (m->control_pressed) {
350 delete reading; delete output; delete randT;
351 if (randomtree == "") { outSum.close(); }
352 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
353 delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
358 for(int r = 0; r < numComp; r++) {
359 //add trees pscore to map of scores
360 map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
361 if (it != rscoreFreq[r].end()) {//already have that score
362 rscoreFreq[r][randomData[r]]++;
363 }else{//first time we have seen this score
364 rscoreFreq[r][randomData[r]] = 1;
367 //add randoms score to validscores
368 validScores[randomData[r]] = randomData[r];
371 //update progress bar
378 //get pscores for random trees
379 for (int j = 0; j < iters; j++) {
381 //create new tree with same num nodes and leaves as users
382 randT = new Tree(ct);
383 //create random relationships between nodes
385 randT->assembleRandomTree();
387 if (m->control_pressed) {
388 delete reading; delete output; delete randT; delete ct;
389 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;
393 //get pscore of random tree
394 randomData = pars.getValues(randT, processors, outputDir);
396 if (m->control_pressed) {
397 delete reading; delete output; delete randT; delete ct;
398 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;
401 for(int r = 0; r < numComp; r++) {
402 //add trees pscore to map of scores
403 map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
404 if (it != rscoreFreq[r].end()) {//already have that score
405 rscoreFreq[r][randomData[r]]++;
406 }else{//first time we have seen this score
407 rscoreFreq[r][randomData[r]] = 1;
410 //add randoms score to validscores
411 validScores[randomData[r]] = randomData[r];
414 //update progress bar
421 for(int a = 0; a < numComp; a++) {
422 float rcumul = 0.0000;
423 float ucumul = 0.0000;
424 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
425 for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) {
426 if (randomtree == "") {
427 map<int,double>::iterator it2 = uscoreFreq[a].find(it->first);
428 //user data has that score
429 if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second; }
430 else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score
432 uCumul[a][it->first] = ucumul;
435 //make rscoreFreq map and rCumul
436 map<int,double>::iterator it2 = rscoreFreq[a].find(it->first);
437 //get percentage of random trees with that info
438 if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul+= it2->second; }
439 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
440 rCumul[a][it->first] = rcumul;
443 //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file
444 for (int h = 0; h < userTreeScores[a].size(); h++) {
445 UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]);
449 if (m->control_pressed) {
450 delete reading; delete output;
451 delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
452 if (randomtree == "") { outSum.close(); }
453 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
457 //finish progress bar
461 printParsimonyFile();
462 if (randomtree == "") { printUSummaryFile(); }
464 delete output; delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
466 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;}
468 m->mothurOutEndLine();
469 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
470 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
471 m->mothurOutEndLine();
477 catch(exception& e) {
478 m->errorOut(e, "ParsimonyCommand", "execute");
483 /***********************************************************/
484 void ParsimonyCommand::printParsimonyFile() {
489 if (randomtree == "") {
490 tags.push_back("Score"); tags.push_back("UserFreq"); tags.push_back("UserCumul"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
492 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
495 for(int a = 0; a < numComp; a++) {
496 output->initFile(groupComb[a], tags);
498 for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) {
499 if (randomtree == "") {
500 data.push_back(it->first); data.push_back(uscoreFreq[a][it->first]); data.push_back(uCumul[a][it->first]); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
502 data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
504 output->output(data);
510 catch(exception& e) {
511 m->errorOut(e, "ParsimonyCommand", "printParsimonyFile");
515 /***********************************************************/
516 int ParsimonyCommand::printUSummaryFile() {
519 outSum << "Tree#" << '\t' << "Groups" << '\t' << "ParsScore" << '\t' << "ParsSig" << endl;
520 m->mothurOut("Tree#\tGroups\tParsScore\tParsSig"); m->mothurOutEndLine();
523 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
527 for (int i = 0; i< T.size(); i++) {
528 for(int a = 0; a < numComp; a++) {
529 if (m->control_pressed) { outSum.close(); return 0; }
530 if (UScoreSig[a][i] > (1/(float)iters)) {
531 outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
532 cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
533 m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString(UScoreSig[a][i])); m->mothurOutEndLine();
535 outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
536 cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
537 m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString((1/float(iters)))); m->mothurOutEndLine();
545 catch(exception& e) {
546 m->errorOut(e, "ParsimonyCommand", "printUSummaryFile");
551 /***********************************************************/
552 void ParsimonyCommand::getUserInput() {
556 ct = new CountTable();
558 m->mothurOut("Please enter the number of groups you would like to analyze: ");
560 m->mothurOutJustToLog(toString(numGroups)); m->mothurOutEndLine();
564 numEachGroup.resize(numGroups, 0);
567 map<string, string> groupMap;
570 for (int i = 1; i <= numGroups; i++) {
571 m->mothurOut("Please enter the number of sequences in group " + toString(i) + ": ");
573 m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
575 gps.insert(toString(i));
577 //set tmaps namesOfSeqs
578 for (int j = 0; j < num; j++) {
579 groupMap[toString(count)] = toString(i);
580 nameMap.insert(toString(count));
584 ct->createTable(nameMap, groupMap, gps);
586 //clears buffer so next command doesn't have error
590 m->Treenames = ct->getNamesOfSeqs();
593 catch(exception& e) {
594 m->errorOut(e, "ParsimonyCommand", "getUserInput");
598 /***********************************************************/