5 * Created by Sarah Westcott on 1/26/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "parsimonycommand.h"
11 #include "treereader.h"
13 //**********************************************************************************************************************
14 vector<string> ParsimonyCommand::setParameters(){
16 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
17 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
18 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
19 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
20 CommandParameter prandom("random", "String", "", "", "", "", "",false,false); parameters.push_back(prandom);
21 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
22 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
23 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26 vector<string> myArray;
27 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
31 m->errorOut(e, "ParsimonyCommand", "setParameters");
35 //**********************************************************************************************************************
36 string ParsimonyCommand::getHelpString(){
38 string helpString = "";
39 helpString += "The parsimony command parameters are tree, group, name, random, groups, processors and iters. tree parameter is required unless you have valid current tree file or are using random.\n";
40 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n";
41 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
42 helpString += "The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters).\n";
43 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
44 helpString += "Example parsimony(random=out, iters=500).\n";
45 helpString += "The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony),\n";
46 helpString += "and iters is 1000. The parsimony command output two files: .parsimony and .psummary their descriptions are in the manual.\n";
47 helpString += "Note: No spaces between parameter labels (i.e. random), '=' and parameters (i.e.yourOutputFilename).\n";
51 m->errorOut(e, "ParsimonyCommand", "getHelpString");
55 //**********************************************************************************************************************
56 string ParsimonyCommand::getOutputFileNameTag(string type, string inputName=""){
58 string outputFileName = "";
59 map<string, vector<string> >::iterator it;
61 //is this a type this command creates
62 it = outputTypes.find(type);
63 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
65 if (type == "parsimony") { outputFileName = "parsimony"; }
66 else if (type == "psummary") { outputFileName = "psummary"; }
67 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
69 return outputFileName;
72 m->errorOut(e, "ParsimonyCommand", "getOutputFileNameTag");
77 //**********************************************************************************************************************
78 ParsimonyCommand::ParsimonyCommand(){
80 abort = true; calledHelp = true;
82 vector<string> tempOutNames;
83 outputTypes["parsimony"] = tempOutNames;
84 outputTypes["psummary"] = tempOutNames;
87 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
91 /***********************************************************/
92 ParsimonyCommand::ParsimonyCommand(string option) {
94 abort = false; calledHelp = false;
97 //allow user to run help
98 if(option == "help") { help(); abort = true; calledHelp = true; }
99 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
102 vector<string> myArray = setParameters();
104 OptionParser parser(option);
105 map<string, string> parameters = parser.getParameters();
106 map<string,string>::iterator it;
108 ValidParameters validParameter;
110 //check to make sure all parameters are valid for command
111 for (it = parameters.begin(); it != parameters.end(); it++) {
112 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
115 //initialize outputTypes
116 vector<string> tempOutNames;
117 outputTypes["parsimony"] = tempOutNames;
118 outputTypes["psummary"] = tempOutNames;
120 //if the user changes the input directory command factory will send this info to us in the output parameter
121 string inputDir = validParameter.validFile(parameters, "inputdir", false);
122 if (inputDir == "not found"){ inputDir = ""; }
125 it = parameters.find("tree");
126 //user has given a template file
127 if(it != parameters.end()){
128 path = m->hasPath(it->second);
129 //if the user has not given a path then, add inputdir. else leave path alone.
130 if (path == "") { parameters["tree"] = inputDir + it->second; }
133 it = parameters.find("group");
134 //user has given a template file
135 if(it != parameters.end()){
136 path = m->hasPath(it->second);
137 //if the user has not given a path then, add inputdir. else leave path alone.
138 if (path == "") { parameters["group"] = inputDir + it->second; }
141 it = parameters.find("name");
142 //user has given a template file
143 if(it != parameters.end()){
144 path = m->hasPath(it->second);
145 //if the user has not given a path then, add inputdir. else leave path alone.
146 if (path == "") { parameters["name"] = inputDir + it->second; }
150 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
152 randomtree = validParameter.validFile(parameters, "random", false); if (randomtree == "not found") { randomtree = ""; }
154 //are you trying to use parsimony without reading a tree or saying you want random distribution
155 if (randomtree == "") {
156 //check for required parameters
157 treefile = validParameter.validFile(parameters, "tree", true);
158 if (treefile == "not open") { treefile = ""; abort = true; }
159 else if (treefile == "not found") { //if there is a current design file, use it
160 treefile = m->getTreeFile();
161 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
162 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
163 }else { m->setTreeFile(treefile); }
165 //check for required parameters
166 groupfile = validParameter.validFile(parameters, "group", true);
167 if (groupfile == "not open") { abort = true; }
168 else if (groupfile == "not found") { groupfile = ""; }
169 else { m->setGroupFile(groupfile); }
171 namefile = validParameter.validFile(parameters, "name", true);
172 if (namefile == "not open") { namefile = ""; abort = true; }
173 else if (namefile == "not found") { namefile = ""; }
174 else { m->setNameFile(namefile); }
177 //if the user changes the output directory command factory will send this info to us in the output parameter
178 string outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; if (randomtree == "") { outputDir += m->hasPath(treefile); } }
180 //check for optional parameter and set defaults
181 // ...at some point should added some additional type checking...
182 groups = validParameter.validFile(parameters, "groups", false);
183 if (groups == "not found") { groups = ""; m->clearGroups(); }
185 m->splitAtDash(groups, Groups);
186 m->setGroups(Groups);
189 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
190 m->mothurConvert(itersString, iters);
192 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
193 m->setProcessors(temp);
194 m->mothurConvert(temp, processors);
196 if (namefile == "") {
197 vector<string> files; files.push_back(treefile);
198 parser.getNameFile(files);
204 catch(exception& e) {
205 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
209 /***********************************************************/
210 int ParsimonyCommand::execute() {
213 if (abort == true) { if (calledHelp) { return 0; } return 2; }
216 //randomtree will tell us if user had their own treefile or if they just want the random distribution
217 //user has entered their own tree
218 if (randomtree == "") {
220 m->setTreeFile(treefile);
222 TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
223 T = reader->getTrees();
224 tmap = T[0]->getTreeMap();
227 if(outputDir == "") { outputDir += m->hasPath(treefile); }
228 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + "." + getOutputFileNameTag("parsimony"), itersString);
229 outputNames.push_back(outputDir + m->getSimpleName(treefile) + "." + getOutputFileNameTag("parsimony"));
230 outputTypes["parsimony"].push_back(outputDir + m->getSimpleName(treefile) + "." + getOutputFileNameTag("parsimony"));
232 sumFile = outputDir + m->getSimpleName(treefile) + "." + getOutputFileNameTag("psummary");
233 m->openOutputFile(sumFile, outSum);
234 outputNames.push_back(sumFile);
235 outputTypes["psummary"].push_back(sumFile);
236 }else { //user wants random distribution
239 if(outputDir == "") { outputDir += m->hasPath(randomtree); }
240 output = new ColumnFile(outputDir+ m->getSimpleName(randomtree), itersString);
241 outputNames.push_back(outputDir+ m->getSimpleName(randomtree));
242 outputTypes["parsimony"].push_back(outputDir+ m->getSimpleName(randomtree));
245 //set users groups to analyze
247 vector<string> mGroups = m->getGroups();
248 vector<string> tGroups = tmap->getNamesOfGroups();
249 util.setGroups(mGroups, tGroups, allGroups, numGroups, "parsimony"); //sets the groups the user wants to analyze
250 util.getCombos(groupComb, mGroups, numComp);
251 m->setGroups(mGroups);
253 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
259 reading = new Progress("Comparing to random:", iters);
261 if (m->control_pressed) {
262 delete reading; delete output;
263 delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
264 if (randomtree == "") { outSum.close(); }
265 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
271 //get pscore for users tree
272 userData.resize(numComp,0); //data = AB, AC, BC, ABC.
273 randomData.resize(numComp,0); //data = AB, AC, BC, ABC.
274 rscoreFreq.resize(numComp);
275 uscoreFreq.resize(numComp);
276 rCumul.resize(numComp);
277 uCumul.resize(numComp);
278 userTreeScores.resize(numComp);
279 UScoreSig.resize(numComp);
281 if (randomtree == "") {
282 //get pscores for users trees
283 for (int i = 0; i < T.size(); i++) {
284 userData = pars.getValues(T[i], processors, outputDir); //data = AB, AC, BC, ABC.
286 if (m->control_pressed) {
287 delete reading; delete output;
288 delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
289 if (randomtree == "") { outSum.close(); }
290 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
296 //output scores for each combination
297 for(int k = 0; k < numComp; k++) {
300 map<int,double>::iterator it = uscoreFreq[k].find(userData[k]);
301 if (it == uscoreFreq[k].end()) {//new score
302 uscoreFreq[k][userData[k]] = 1;
303 }else{ uscoreFreq[k][userData[k]]++; }
305 //add users score to valid scores
306 validScores[userData[k]] = userData[k];
308 //save score for summary file
309 userTreeScores[k].push_back(userData[k]);
313 //get pscores for random trees
314 for (int j = 0; j < iters; j++) {
316 //create new tree with same num nodes and leaves as users
317 randT = new Tree(tmap);
319 //create random relationships between nodes
320 randT->assembleRandomTree();
322 //get pscore of random tree
323 randomData = pars.getValues(randT, processors, outputDir);
325 if (m->control_pressed) {
326 delete reading; delete output; delete randT;
327 if (randomtree == "") { outSum.close(); }
328 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
329 delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
334 for(int r = 0; r < numComp; r++) {
335 //add trees pscore to map of scores
336 map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
337 if (it != rscoreFreq[r].end()) {//already have that score
338 rscoreFreq[r][randomData[r]]++;
339 }else{//first time we have seen this score
340 rscoreFreq[r][randomData[r]] = 1;
343 //add randoms score to validscores
344 validScores[randomData[r]] = randomData[r];
347 //update progress bar
354 //get pscores for random trees
355 for (int j = 0; j < iters; j++) {
357 //create new tree with same num nodes and leaves as users
358 randT = new Tree(tmap);
359 //create random relationships between nodes
361 randT->assembleRandomTree();
363 if (m->control_pressed) {
364 delete reading; delete output; delete randT; delete tmap;
365 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;
369 //get pscore of random tree
370 randomData = pars.getValues(randT, processors, outputDir);
372 if (m->control_pressed) {
373 delete reading; delete output; delete randT; delete tmap;
374 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;
377 for(int r = 0; r < numComp; r++) {
378 //add trees pscore to map of scores
379 map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
380 if (it != rscoreFreq[r].end()) {//already have that score
381 rscoreFreq[r][randomData[r]]++;
382 }else{//first time we have seen this score
383 rscoreFreq[r][randomData[r]] = 1;
386 //add randoms score to validscores
387 validScores[randomData[r]] = randomData[r];
390 //update progress bar
397 for(int a = 0; a < numComp; a++) {
398 float rcumul = 0.0000;
399 float ucumul = 0.0000;
400 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
401 for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) {
402 if (randomtree == "") {
403 map<int,double>::iterator it2 = uscoreFreq[a].find(it->first);
404 //user data has that score
405 if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second; }
406 else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score
408 uCumul[a][it->first] = ucumul;
411 //make rscoreFreq map and rCumul
412 map<int,double>::iterator it2 = rscoreFreq[a].find(it->first);
413 //get percentage of random trees with that info
414 if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul+= it2->second; }
415 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
416 rCumul[a][it->first] = rcumul;
419 //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file
420 for (int h = 0; h < userTreeScores[a].size(); h++) {
421 UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]);
425 if (m->control_pressed) {
426 delete reading; delete output;
427 delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
428 if (randomtree == "") { outSum.close(); }
429 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
433 //finish progress bar
437 printParsimonyFile();
438 if (randomtree == "") { printUSummaryFile(); }
440 delete output; delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
442 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;}
444 m->mothurOutEndLine();
445 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
446 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
447 m->mothurOutEndLine();
453 catch(exception& e) {
454 m->errorOut(e, "ParsimonyCommand", "execute");
459 /***********************************************************/
460 void ParsimonyCommand::printParsimonyFile() {
465 if (randomtree == "") {
466 tags.push_back("Score"); tags.push_back("UserFreq"); tags.push_back("UserCumul"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
468 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
471 for(int a = 0; a < numComp; a++) {
472 output->initFile(groupComb[a], tags);
474 for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) {
475 if (randomtree == "") {
476 data.push_back(it->first); data.push_back(uscoreFreq[a][it->first]); data.push_back(uCumul[a][it->first]); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
478 data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
480 output->output(data);
486 catch(exception& e) {
487 m->errorOut(e, "ParsimonyCommand", "printParsimonyFile");
491 /***********************************************************/
492 int ParsimonyCommand::printUSummaryFile() {
495 outSum << "Tree#" << '\t' << "Groups" << '\t' << "ParsScore" << '\t' << "ParsSig" << endl;
496 m->mothurOut("Tree#\tGroups\tParsScore\tParsSig"); m->mothurOutEndLine();
499 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
503 for (int i = 0; i< T.size(); i++) {
504 for(int a = 0; a < numComp; a++) {
505 if (m->control_pressed) { outSum.close(); return 0; }
506 if (UScoreSig[a][i] > (1/(float)iters)) {
507 outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
508 cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
509 m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString(UScoreSig[a][i])); m->mothurOutEndLine();
511 outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
512 cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
513 m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString((1/float(iters)))); m->mothurOutEndLine();
521 catch(exception& e) {
522 m->errorOut(e, "ParsimonyCommand", "printUSummaryFile");
527 /***********************************************************/
528 void ParsimonyCommand::getUserInput() {
532 tmap = new TreeMap();
534 m->mothurOut("Please enter the number of groups you would like to analyze: ");
536 m->mothurOutJustToLog(toString(numGroups)); m->mothurOutEndLine();
540 numEachGroup.resize(numGroups, 0);
543 for (int i = 1; i <= numGroups; i++) {
544 m->mothurOut("Please enter the number of sequences in group " + toString(i) + ": ");
546 m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
548 //set tmaps seqsPerGroup
549 tmap->seqsPerGroup[toString(i)] = num;
550 tmap->addGroup(toString(i));
552 //set tmaps namesOfSeqs
553 for (int j = 0; j < num; j++) {
554 tmap->namesOfSeqs.push_back(toString(count));
555 tmap->treemap[toString(count)].groupname = toString(i);
560 //clears buffer so next command doesn't have error
564 m->Treenames = tmap->namesOfSeqs;
567 catch(exception& e) {
568 m->errorOut(e, "ParsimonyCommand", "getUserInput");
572 /***********************************************************/