5 * Created by Sarah Westcott on 1/26/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "parsimonycommand.h"
11 #include "treereader.h"
13 //**********************************************************************************************************************
14 vector<string> ParsimonyCommand::setParameters(){
16 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
17 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
18 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
19 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
20 CommandParameter prandom("random", "String", "", "", "", "", "",false,false); parameters.push_back(prandom);
21 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
22 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
23 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26 vector<string> myArray;
27 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
31 m->errorOut(e, "ParsimonyCommand", "setParameters");
35 //**********************************************************************************************************************
36 string ParsimonyCommand::getHelpString(){
38 string helpString = "";
39 helpString += "The parsimony command parameters are tree, group, name, random, groups, processors and iters. tree parameter is required unless you have valid current tree file or are using random.\n";
40 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n";
41 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
42 helpString += "The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters).\n";
43 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
44 helpString += "Example parsimony(random=out, iters=500).\n";
45 helpString += "The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony),\n";
46 helpString += "and iters is 1000. The parsimony command output two files: .parsimony and .psummary their descriptions are in the manual.\n";
47 helpString += "Note: No spaces between parameter labels (i.e. random), '=' and parameters (i.e.yourOutputFilename).\n";
51 m->errorOut(e, "ParsimonyCommand", "getHelpString");
56 //**********************************************************************************************************************
57 ParsimonyCommand::ParsimonyCommand(){
59 abort = true; calledHelp = true;
61 vector<string> tempOutNames;
62 outputTypes["parsimony"] = tempOutNames;
63 outputTypes["psummary"] = tempOutNames;
66 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
70 /***********************************************************/
71 ParsimonyCommand::ParsimonyCommand(string option) {
73 abort = false; calledHelp = false;
76 //allow user to run help
77 if(option == "help") { help(); abort = true; calledHelp = true; }
78 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
81 vector<string> myArray = setParameters();
83 OptionParser parser(option);
84 map<string, string> parameters = parser.getParameters();
85 map<string,string>::iterator it;
87 ValidParameters validParameter;
89 //check to make sure all parameters are valid for command
90 for (it = parameters.begin(); it != parameters.end(); it++) {
91 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
94 //initialize outputTypes
95 vector<string> tempOutNames;
96 outputTypes["parsimony"] = tempOutNames;
97 outputTypes["psummary"] = tempOutNames;
99 //if the user changes the input directory command factory will send this info to us in the output parameter
100 string inputDir = validParameter.validFile(parameters, "inputdir", false);
101 if (inputDir == "not found"){ inputDir = ""; }
104 it = parameters.find("tree");
105 //user has given a template file
106 if(it != parameters.end()){
107 path = m->hasPath(it->second);
108 //if the user has not given a path then, add inputdir. else leave path alone.
109 if (path == "") { parameters["tree"] = inputDir + it->second; }
112 it = parameters.find("group");
113 //user has given a template file
114 if(it != parameters.end()){
115 path = m->hasPath(it->second);
116 //if the user has not given a path then, add inputdir. else leave path alone.
117 if (path == "") { parameters["group"] = inputDir + it->second; }
120 it = parameters.find("name");
121 //user has given a template file
122 if(it != parameters.end()){
123 path = m->hasPath(it->second);
124 //if the user has not given a path then, add inputdir. else leave path alone.
125 if (path == "") { parameters["name"] = inputDir + it->second; }
129 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
131 randomtree = validParameter.validFile(parameters, "random", false); if (randomtree == "not found") { randomtree = ""; }
133 //are you trying to use parsimony without reading a tree or saying you want random distribution
134 if (randomtree == "") {
135 //check for required parameters
136 treefile = validParameter.validFile(parameters, "tree", true);
137 if (treefile == "not open") { treefile = ""; abort = true; }
138 else if (treefile == "not found") { //if there is a current design file, use it
139 treefile = m->getTreeFile();
140 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
141 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
142 }else { m->setTreeFile(treefile); }
144 //check for required parameters
145 groupfile = validParameter.validFile(parameters, "group", true);
146 if (groupfile == "not open") { abort = true; }
147 else if (groupfile == "not found") { groupfile = ""; }
148 else { m->setGroupFile(groupfile); }
150 namefile = validParameter.validFile(parameters, "name", true);
151 if (namefile == "not open") { namefile = ""; abort = true; }
152 else if (namefile == "not found") { namefile = ""; }
153 else { m->setNameFile(namefile); }
156 //if the user changes the output directory command factory will send this info to us in the output parameter
157 string outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; if (randomtree == "") { outputDir += m->hasPath(treefile); } }
159 //check for optional parameter and set defaults
160 // ...at some point should added some additional type checking...
161 groups = validParameter.validFile(parameters, "groups", false);
162 if (groups == "not found") { groups = ""; m->clearGroups(); }
164 m->splitAtDash(groups, Groups);
165 m->setGroups(Groups);
168 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
169 m->mothurConvert(itersString, iters);
171 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
172 m->setProcessors(temp);
173 m->mothurConvert(temp, processors);
175 if (namefile == "") {
176 vector<string> files; files.push_back(treefile);
177 parser.getNameFile(files);
183 catch(exception& e) {
184 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
188 /***********************************************************/
189 int ParsimonyCommand::execute() {
192 if (abort == true) { if (calledHelp) { return 0; } return 2; }
195 //randomtree will tell us if user had their own treefile or if they just want the random distribution
196 //user has entered their own tree
197 if (randomtree == "") {
199 m->setTreeFile(treefile);
201 TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
202 T = reader->getTrees();
203 tmap = T[0]->getTreeMap();
206 if(outputDir == "") { outputDir += m->hasPath(treefile); }
207 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + ".parsimony", itersString);
208 outputNames.push_back(outputDir + m->getSimpleName(treefile) + ".parsimony");
209 outputTypes["parsimony"].push_back(outputDir + m->getSimpleName(treefile) + ".parsimony");
211 sumFile = outputDir + m->getSimpleName(treefile) + ".psummary";
212 m->openOutputFile(sumFile, outSum);
213 outputNames.push_back(sumFile);
214 outputTypes["psummary"].push_back(sumFile);
215 }else { //user wants random distribution
218 if(outputDir == "") { outputDir += m->hasPath(randomtree); }
219 output = new ColumnFile(outputDir+ m->getSimpleName(randomtree), itersString);
220 outputNames.push_back(outputDir+ m->getSimpleName(randomtree));
221 outputTypes["parsimony"].push_back(outputDir+ m->getSimpleName(randomtree));
224 //set users groups to analyze
226 vector<string> mGroups = m->getGroups();
227 vector<string> tGroups = tmap->getNamesOfGroups();
228 util.setGroups(mGroups, tGroups, allGroups, numGroups, "parsimony"); //sets the groups the user wants to analyze
229 util.getCombos(groupComb, mGroups, numComp);
230 m->setGroups(mGroups);
232 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
238 reading = new Progress("Comparing to random:", iters);
240 if (m->control_pressed) {
241 delete reading; delete output;
242 delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
243 if (randomtree == "") { outSum.close(); }
244 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
250 //get pscore for users tree
251 userData.resize(numComp,0); //data = AB, AC, BC, ABC.
252 randomData.resize(numComp,0); //data = AB, AC, BC, ABC.
253 rscoreFreq.resize(numComp);
254 uscoreFreq.resize(numComp);
255 rCumul.resize(numComp);
256 uCumul.resize(numComp);
257 userTreeScores.resize(numComp);
258 UScoreSig.resize(numComp);
260 if (randomtree == "") {
261 //get pscores for users trees
262 for (int i = 0; i < T.size(); i++) {
263 userData = pars.getValues(T[i], processors, outputDir); //data = AB, AC, BC, ABC.
265 if (m->control_pressed) {
266 delete reading; delete output;
267 delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
268 if (randomtree == "") { outSum.close(); }
269 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
275 //output scores for each combination
276 for(int k = 0; k < numComp; k++) {
279 map<int,double>::iterator it = uscoreFreq[k].find(userData[k]);
280 if (it == uscoreFreq[k].end()) {//new score
281 uscoreFreq[k][userData[k]] = 1;
282 }else{ uscoreFreq[k][userData[k]]++; }
284 //add users score to valid scores
285 validScores[userData[k]] = userData[k];
287 //save score for summary file
288 userTreeScores[k].push_back(userData[k]);
292 //get pscores for random trees
293 for (int j = 0; j < iters; j++) {
295 //create new tree with same num nodes and leaves as users
296 randT = new Tree(tmap);
298 //create random relationships between nodes
299 randT->assembleRandomTree();
301 //get pscore of random tree
302 randomData = pars.getValues(randT, processors, outputDir);
304 if (m->control_pressed) {
305 delete reading; delete output; delete randT;
306 if (randomtree == "") { outSum.close(); }
307 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
308 delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
313 for(int r = 0; r < numComp; r++) {
314 //add trees pscore to map of scores
315 map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
316 if (it != rscoreFreq[r].end()) {//already have that score
317 rscoreFreq[r][randomData[r]]++;
318 }else{//first time we have seen this score
319 rscoreFreq[r][randomData[r]] = 1;
322 //add randoms score to validscores
323 validScores[randomData[r]] = randomData[r];
326 //update progress bar
333 //get pscores for random trees
334 for (int j = 0; j < iters; j++) {
336 //create new tree with same num nodes and leaves as users
337 randT = new Tree(tmap);
338 //create random relationships between nodes
340 randT->assembleRandomTree();
342 if (m->control_pressed) {
343 delete reading; delete output; delete randT; delete tmap;
344 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;
348 //get pscore of random tree
349 randomData = pars.getValues(randT, processors, outputDir);
351 if (m->control_pressed) {
352 delete reading; delete output; delete randT; delete tmap;
353 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;
356 for(int r = 0; r < numComp; r++) {
357 //add trees pscore to map of scores
358 map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
359 if (it != rscoreFreq[r].end()) {//already have that score
360 rscoreFreq[r][randomData[r]]++;
361 }else{//first time we have seen this score
362 rscoreFreq[r][randomData[r]] = 1;
365 //add randoms score to validscores
366 validScores[randomData[r]] = randomData[r];
369 //update progress bar
376 for(int a = 0; a < numComp; a++) {
377 float rcumul = 0.0000;
378 float ucumul = 0.0000;
379 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
380 for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) {
381 if (randomtree == "") {
382 map<int,double>::iterator it2 = uscoreFreq[a].find(it->first);
383 //user data has that score
384 if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second; }
385 else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score
387 uCumul[a][it->first] = ucumul;
390 //make rscoreFreq map and rCumul
391 map<int,double>::iterator it2 = rscoreFreq[a].find(it->first);
392 //get percentage of random trees with that info
393 if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul+= it2->second; }
394 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
395 rCumul[a][it->first] = rcumul;
398 //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file
399 for (int h = 0; h < userTreeScores[a].size(); h++) {
400 UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]);
404 if (m->control_pressed) {
405 delete reading; delete output;
406 delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
407 if (randomtree == "") { outSum.close(); }
408 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
412 //finish progress bar
416 printParsimonyFile();
417 if (randomtree == "") { printUSummaryFile(); }
419 delete output; delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
421 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;}
423 m->mothurOutEndLine();
424 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
425 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
426 m->mothurOutEndLine();
432 catch(exception& e) {
433 m->errorOut(e, "ParsimonyCommand", "execute");
438 /***********************************************************/
439 void ParsimonyCommand::printParsimonyFile() {
444 if (randomtree == "") {
445 tags.push_back("Score"); tags.push_back("UserFreq"); tags.push_back("UserCumul"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
447 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
450 for(int a = 0; a < numComp; a++) {
451 output->initFile(groupComb[a], tags);
453 for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) {
454 if (randomtree == "") {
455 data.push_back(it->first); data.push_back(uscoreFreq[a][it->first]); data.push_back(uCumul[a][it->first]); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
457 data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
459 output->output(data);
465 catch(exception& e) {
466 m->errorOut(e, "ParsimonyCommand", "printParsimonyFile");
470 /***********************************************************/
471 int ParsimonyCommand::printUSummaryFile() {
474 outSum << "Tree#" << '\t' << "Groups" << '\t' << "ParsScore" << '\t' << "ParsSig" << endl;
475 m->mothurOut("Tree#\tGroups\tParsScore\tParsSig"); m->mothurOutEndLine();
478 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
482 for (int i = 0; i< T.size(); i++) {
483 for(int a = 0; a < numComp; a++) {
484 if (m->control_pressed) { outSum.close(); return 0; }
485 if (UScoreSig[a][i] > (1/(float)iters)) {
486 outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
487 cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
488 m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString(UScoreSig[a][i])); m->mothurOutEndLine();
490 outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
491 cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
492 m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString((1/float(iters)))); m->mothurOutEndLine();
500 catch(exception& e) {
501 m->errorOut(e, "ParsimonyCommand", "printUSummaryFile");
506 /***********************************************************/
507 void ParsimonyCommand::getUserInput() {
511 tmap = new TreeMap();
513 m->mothurOut("Please enter the number of groups you would like to analyze: ");
515 m->mothurOutJustToLog(toString(numGroups)); m->mothurOutEndLine();
519 numEachGroup.resize(numGroups, 0);
522 for (int i = 1; i <= numGroups; i++) {
523 m->mothurOut("Please enter the number of sequences in group " + toString(i) + ": ");
525 m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
527 //set tmaps seqsPerGroup
528 tmap->seqsPerGroup[toString(i)] = num;
529 tmap->addGroup(toString(i));
531 //set tmaps namesOfSeqs
532 for (int j = 0; j < num; j++) {
533 tmap->namesOfSeqs.push_back(toString(count));
534 tmap->treemap[toString(count)].groupname = toString(i);
539 //clears buffer so next command doesn't have error
543 m->Treenames = tmap->namesOfSeqs;
546 catch(exception& e) {
547 m->errorOut(e, "ParsimonyCommand", "getUserInput");
551 /***********************************************************/