5 * Created by Sarah Westcott on 1/26/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "parsimonycommand.h"
11 #include "treereader.h"
13 //**********************************************************************************************************************
14 vector<string> ParsimonyCommand::setParameters(){
16 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
17 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
18 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
19 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
20 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
21 CommandParameter prandom("random", "String", "", "", "", "", "",false,false); parameters.push_back(prandom);
22 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
24 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27 vector<string> myArray;
28 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
32 m->errorOut(e, "ParsimonyCommand", "setParameters");
36 //**********************************************************************************************************************
37 string ParsimonyCommand::getHelpString(){
39 string helpString = "";
40 helpString += "The parsimony command parameters are tree, group, name, count, random, groups, processors and iters. tree parameter is required unless you have valid current tree file or are using random.\n";
41 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n";
42 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
43 helpString += "The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters).\n";
44 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
45 helpString += "Example parsimony(random=out, iters=500).\n";
46 helpString += "The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony),\n";
47 helpString += "and iters is 1000. The parsimony command output two files: .parsimony and .psummary their descriptions are in the manual.\n";
48 helpString += "Note: No spaces between parameter labels (i.e. random), '=' and parameters (i.e.yourOutputFilename).\n";
52 m->errorOut(e, "ParsimonyCommand", "getHelpString");
56 //**********************************************************************************************************************
57 string ParsimonyCommand::getOutputFileNameTag(string type, string inputName=""){
59 string outputFileName = "";
60 map<string, vector<string> >::iterator it;
62 //is this a type this command creates
63 it = outputTypes.find(type);
64 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
66 if (type == "parsimony") { outputFileName = "parsimony"; }
67 else if (type == "psummary") { outputFileName = "psummary"; }
68 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
70 return outputFileName;
73 m->errorOut(e, "ParsimonyCommand", "getOutputFileNameTag");
78 //**********************************************************************************************************************
79 ParsimonyCommand::ParsimonyCommand(){
81 abort = true; calledHelp = true;
83 vector<string> tempOutNames;
84 outputTypes["parsimony"] = tempOutNames;
85 outputTypes["psummary"] = tempOutNames;
88 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
92 /***********************************************************/
93 ParsimonyCommand::ParsimonyCommand(string option) {
95 abort = false; calledHelp = false;
98 //allow user to run help
99 if(option == "help") { help(); abort = true; calledHelp = true; }
100 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
103 vector<string> myArray = setParameters();
105 OptionParser parser(option);
106 map<string, string> parameters = parser.getParameters();
107 map<string,string>::iterator it;
109 ValidParameters validParameter;
111 //check to make sure all parameters are valid for command
112 for (it = parameters.begin(); it != parameters.end(); it++) {
113 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
116 //initialize outputTypes
117 vector<string> tempOutNames;
118 outputTypes["parsimony"] = tempOutNames;
119 outputTypes["psummary"] = tempOutNames;
121 //if the user changes the input directory command factory will send this info to us in the output parameter
122 string inputDir = validParameter.validFile(parameters, "inputdir", false);
123 if (inputDir == "not found"){ inputDir = ""; }
126 it = parameters.find("tree");
127 //user has given a template file
128 if(it != parameters.end()){
129 path = m->hasPath(it->second);
130 //if the user has not given a path then, add inputdir. else leave path alone.
131 if (path == "") { parameters["tree"] = inputDir + it->second; }
134 it = parameters.find("group");
135 //user has given a template file
136 if(it != parameters.end()){
137 path = m->hasPath(it->second);
138 //if the user has not given a path then, add inputdir. else leave path alone.
139 if (path == "") { parameters["group"] = inputDir + it->second; }
142 it = parameters.find("name");
143 //user has given a template file
144 if(it != parameters.end()){
145 path = m->hasPath(it->second);
146 //if the user has not given a path then, add inputdir. else leave path alone.
147 if (path == "") { parameters["name"] = inputDir + it->second; }
150 it = parameters.find("count");
151 //user has given a template file
152 if(it != parameters.end()){
153 path = m->hasPath(it->second);
154 //if the user has not given a path then, add inputdir. else leave path alone.
155 if (path == "") { parameters["count"] = inputDir + it->second; }
159 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
161 randomtree = validParameter.validFile(parameters, "random", false); if (randomtree == "not found") { randomtree = ""; }
163 //are you trying to use parsimony without reading a tree or saying you want random distribution
164 if (randomtree == "") {
165 //check for required parameters
166 treefile = validParameter.validFile(parameters, "tree", true);
167 if (treefile == "not open") { treefile = ""; abort = true; }
168 else if (treefile == "not found") { //if there is a current design file, use it
169 treefile = m->getTreeFile();
170 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
171 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
172 }else { m->setTreeFile(treefile); }
174 //check for required parameters
175 groupfile = validParameter.validFile(parameters, "group", true);
176 if (groupfile == "not open") { abort = true; }
177 else if (groupfile == "not found") { groupfile = ""; }
178 else { m->setGroupFile(groupfile); }
180 namefile = validParameter.validFile(parameters, "name", true);
181 if (namefile == "not open") { namefile = ""; abort = true; }
182 else if (namefile == "not found") { namefile = ""; }
183 else { m->setNameFile(namefile); }
185 countfile = validParameter.validFile(parameters, "count", true);
186 if (countfile == "not open") { countfile = ""; abort = true; }
187 else if (countfile == "not found") { countfile = ""; }
188 else { m->setCountTableFile(countfile); }
190 if ((namefile != "") && (countfile != "")) {
191 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
194 if ((groupfile != "") && (countfile != "")) {
195 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
200 //if the user changes the output directory command factory will send this info to us in the output parameter
201 string outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; if (randomtree == "") { outputDir += m->hasPath(treefile); } }
203 //check for optional parameter and set defaults
204 // ...at some point should added some additional type checking...
205 groups = validParameter.validFile(parameters, "groups", false);
206 if (groups == "not found") { groups = ""; m->clearGroups(); }
208 m->splitAtDash(groups, Groups);
209 m->setGroups(Groups);
212 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
213 m->mothurConvert(itersString, iters);
215 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
216 m->setProcessors(temp);
217 m->mothurConvert(temp, processors);
220 if (namefile == "") {
221 vector<string> files; files.push_back(treefile);
222 parser.getNameFile(files);
229 catch(exception& e) {
230 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
234 /***********************************************************/
235 int ParsimonyCommand::execute() {
238 if (abort == true) { if (calledHelp) { return 0; } return 2; }
241 //randomtree will tell us if user had their own treefile or if they just want the random distribution
242 //user has entered their own tree
243 if (randomtree == "") {
245 m->setTreeFile(treefile);
248 if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
249 else { reader = new TreeReader(treefile, countfile); }
250 T = reader->getTrees();
251 ct = T[0]->getCountTable();
254 if(outputDir == "") { outputDir += m->hasPath(treefile); }
255 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + "." + getOutputFileNameTag("parsimony"), itersString);
256 outputNames.push_back(outputDir + m->getSimpleName(treefile) + "." + getOutputFileNameTag("parsimony"));
257 outputTypes["parsimony"].push_back(outputDir + m->getSimpleName(treefile) + "." + getOutputFileNameTag("parsimony"));
259 sumFile = outputDir + m->getSimpleName(treefile) + "." + getOutputFileNameTag("psummary");
260 m->openOutputFile(sumFile, outSum);
261 outputNames.push_back(sumFile);
262 outputTypes["psummary"].push_back(sumFile);
263 }else { //user wants random distribution
266 if(outputDir == "") { outputDir += m->hasPath(randomtree); }
267 output = new ColumnFile(outputDir+ m->getSimpleName(randomtree), itersString);
268 outputNames.push_back(outputDir+ m->getSimpleName(randomtree));
269 outputTypes["parsimony"].push_back(outputDir+ m->getSimpleName(randomtree));
272 //set users groups to analyze
274 vector<string> mGroups = m->getGroups();
275 vector<string> tGroups = ct->getNamesOfGroups();
276 util.setGroups(mGroups, tGroups, allGroups, numGroups, "parsimony"); //sets the groups the user wants to analyze
277 util.getCombos(groupComb, mGroups, numComp);
278 m->setGroups(mGroups);
280 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
286 reading = new Progress("Comparing to random:", iters);
288 if (m->control_pressed) {
289 delete reading; delete output;
290 delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
291 if (randomtree == "") { outSum.close(); }
292 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
298 //get pscore for users tree
299 userData.resize(numComp,0); //data = AB, AC, BC, ABC.
300 randomData.resize(numComp,0); //data = AB, AC, BC, ABC.
301 rscoreFreq.resize(numComp);
302 uscoreFreq.resize(numComp);
303 rCumul.resize(numComp);
304 uCumul.resize(numComp);
305 userTreeScores.resize(numComp);
306 UScoreSig.resize(numComp);
308 if (randomtree == "") {
309 //get pscores for users trees
310 for (int i = 0; i < T.size(); i++) {
311 userData = pars.getValues(T[i], processors, outputDir); //data = AB, AC, BC, ABC.
313 if (m->control_pressed) {
314 delete reading; delete output;
315 delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
316 if (randomtree == "") { outSum.close(); }
317 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
323 //output scores for each combination
324 for(int k = 0; k < numComp; k++) {
327 map<int,double>::iterator it = uscoreFreq[k].find(userData[k]);
328 if (it == uscoreFreq[k].end()) {//new score
329 uscoreFreq[k][userData[k]] = 1;
330 }else{ uscoreFreq[k][userData[k]]++; }
332 //add users score to valid scores
333 validScores[userData[k]] = userData[k];
335 //save score for summary file
336 userTreeScores[k].push_back(userData[k]);
340 //get pscores for random trees
341 for (int j = 0; j < iters; j++) {
343 //create new tree with same num nodes and leaves as users
344 randT = new Tree(ct);
346 //create random relationships between nodes
347 randT->assembleRandomTree();
349 //get pscore of random tree
350 randomData = pars.getValues(randT, processors, outputDir);
352 if (m->control_pressed) {
353 delete reading; delete output; delete randT;
354 if (randomtree == "") { outSum.close(); }
355 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
356 delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
361 for(int r = 0; r < numComp; r++) {
362 //add trees pscore to map of scores
363 map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
364 if (it != rscoreFreq[r].end()) {//already have that score
365 rscoreFreq[r][randomData[r]]++;
366 }else{//first time we have seen this score
367 rscoreFreq[r][randomData[r]] = 1;
370 //add randoms score to validscores
371 validScores[randomData[r]] = randomData[r];
374 //update progress bar
381 //get pscores for random trees
382 for (int j = 0; j < iters; j++) {
384 //create new tree with same num nodes and leaves as users
385 randT = new Tree(ct);
386 //create random relationships between nodes
388 randT->assembleRandomTree();
390 if (m->control_pressed) {
391 delete reading; delete output; delete randT; delete ct;
392 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;
396 //get pscore of random tree
397 randomData = pars.getValues(randT, processors, outputDir);
399 if (m->control_pressed) {
400 delete reading; delete output; delete randT; delete ct;
401 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;
404 for(int r = 0; r < numComp; r++) {
405 //add trees pscore to map of scores
406 map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
407 if (it != rscoreFreq[r].end()) {//already have that score
408 rscoreFreq[r][randomData[r]]++;
409 }else{//first time we have seen this score
410 rscoreFreq[r][randomData[r]] = 1;
413 //add randoms score to validscores
414 validScores[randomData[r]] = randomData[r];
417 //update progress bar
424 for(int a = 0; a < numComp; a++) {
425 float rcumul = 0.0000;
426 float ucumul = 0.0000;
427 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
428 for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) {
429 if (randomtree == "") {
430 map<int,double>::iterator it2 = uscoreFreq[a].find(it->first);
431 //user data has that score
432 if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second; }
433 else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score
435 uCumul[a][it->first] = ucumul;
438 //make rscoreFreq map and rCumul
439 map<int,double>::iterator it2 = rscoreFreq[a].find(it->first);
440 //get percentage of random trees with that info
441 if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul+= it2->second; }
442 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
443 rCumul[a][it->first] = rcumul;
446 //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file
447 for (int h = 0; h < userTreeScores[a].size(); h++) {
448 UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]);
452 if (m->control_pressed) {
453 delete reading; delete output;
454 delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
455 if (randomtree == "") { outSum.close(); }
456 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
460 //finish progress bar
464 printParsimonyFile();
465 if (randomtree == "") { printUSummaryFile(); }
467 delete output; delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
469 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;}
471 m->mothurOutEndLine();
472 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
473 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
474 m->mothurOutEndLine();
480 catch(exception& e) {
481 m->errorOut(e, "ParsimonyCommand", "execute");
486 /***********************************************************/
487 void ParsimonyCommand::printParsimonyFile() {
492 if (randomtree == "") {
493 tags.push_back("Score"); tags.push_back("UserFreq"); tags.push_back("UserCumul"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
495 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
498 for(int a = 0; a < numComp; a++) {
499 output->initFile(groupComb[a], tags);
501 for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) {
502 if (randomtree == "") {
503 data.push_back(it->first); data.push_back(uscoreFreq[a][it->first]); data.push_back(uCumul[a][it->first]); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
505 data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
507 output->output(data);
513 catch(exception& e) {
514 m->errorOut(e, "ParsimonyCommand", "printParsimonyFile");
518 /***********************************************************/
519 int ParsimonyCommand::printUSummaryFile() {
522 outSum << "Tree#" << '\t' << "Groups" << '\t' << "ParsScore" << '\t' << "ParsSig" << endl;
523 m->mothurOut("Tree#\tGroups\tParsScore\tParsSig"); m->mothurOutEndLine();
526 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
530 for (int i = 0; i< T.size(); i++) {
531 for(int a = 0; a < numComp; a++) {
532 if (m->control_pressed) { outSum.close(); return 0; }
533 if (UScoreSig[a][i] > (1/(float)iters)) {
534 outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
535 cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
536 m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString(UScoreSig[a][i])); m->mothurOutEndLine();
538 outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
539 cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
540 m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString((1/float(iters)))); m->mothurOutEndLine();
548 catch(exception& e) {
549 m->errorOut(e, "ParsimonyCommand", "printUSummaryFile");
554 /***********************************************************/
555 void ParsimonyCommand::getUserInput() {
559 ct = new CountTable();
561 m->mothurOut("Please enter the number of groups you would like to analyze: ");
563 m->mothurOutJustToLog(toString(numGroups)); m->mothurOutEndLine();
567 numEachGroup.resize(numGroups, 0);
570 map<string, string> groupMap;
573 for (int i = 1; i <= numGroups; i++) {
574 m->mothurOut("Please enter the number of sequences in group " + toString(i) + ": ");
576 m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
578 gps.insert(toString(i));
580 //set tmaps namesOfSeqs
581 for (int j = 0; j < num; j++) {
582 groupMap[toString(count)] = i;
583 nameMap.insert(toString(count));
587 ct->createTable(nameMap, groupMap, gps);
589 //clears buffer so next command doesn't have error
593 m->Treenames = ct->getNamesOfSeqs();
595 catch(exception& e) {
596 m->errorOut(e, "ParsimonyCommand", "getUserInput");
600 /***********************************************************/