5 * Created by Sarah Westcott on 1/26/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "parsimonycommand.h"
12 //**********************************************************************************************************************
13 vector<string> ParsimonyCommand::setParameters(){
15 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
16 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
17 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
18 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
19 CommandParameter prandom("random", "String", "", "", "", "", "",false,false); parameters.push_back(prandom);
20 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
21 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
22 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
23 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25 vector<string> myArray;
26 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
30 m->errorOut(e, "ParsimonyCommand", "setParameters");
34 //**********************************************************************************************************************
35 string ParsimonyCommand::getHelpString(){
37 string helpString = "";
38 helpString += "The parsimony command parameters are tree, group, name, random, groups, processors and iters. tree parameter is required unless you have valid current tree file or are using random.\n";
39 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n";
40 helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
41 helpString += "The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters).\n";
42 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
43 helpString += "Example parsimony(random=out, iters=500).\n";
44 helpString += "The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony),\n";
45 helpString += "and iters is 1000. The parsimony command output two files: .parsimony and .psummary their descriptions are in the manual.\n";
46 helpString += "Note: No spaces between parameter labels (i.e. random), '=' and parameters (i.e.yourOutputFilename).\n";
50 m->errorOut(e, "ParsimonyCommand", "getHelpString");
55 //**********************************************************************************************************************
56 ParsimonyCommand::ParsimonyCommand(){
58 abort = true; calledHelp = true;
60 vector<string> tempOutNames;
61 outputTypes["parsimony"] = tempOutNames;
62 outputTypes["psummary"] = tempOutNames;
65 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
69 /***********************************************************/
70 ParsimonyCommand::ParsimonyCommand(string option) {
72 abort = false; calledHelp = false;
75 //allow user to run help
76 if(option == "help") { help(); abort = true; calledHelp = true; }
77 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
80 vector<string> myArray = setParameters();
82 OptionParser parser(option);
83 map<string, string> parameters = parser.getParameters();
84 map<string,string>::iterator it;
86 ValidParameters validParameter;
88 //check to make sure all parameters are valid for command
89 for (it = parameters.begin(); it != parameters.end(); it++) {
90 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
93 //initialize outputTypes
94 vector<string> tempOutNames;
95 outputTypes["parsimony"] = tempOutNames;
96 outputTypes["psummary"] = tempOutNames;
98 //if the user changes the input directory command factory will send this info to us in the output parameter
99 string inputDir = validParameter.validFile(parameters, "inputdir", false);
100 if (inputDir == "not found"){ inputDir = ""; }
103 it = parameters.find("tree");
104 //user has given a template file
105 if(it != parameters.end()){
106 path = m->hasPath(it->second);
107 //if the user has not given a path then, add inputdir. else leave path alone.
108 if (path == "") { parameters["tree"] = inputDir + it->second; }
111 it = parameters.find("group");
112 //user has given a template file
113 if(it != parameters.end()){
114 path = m->hasPath(it->second);
115 //if the user has not given a path then, add inputdir. else leave path alone.
116 if (path == "") { parameters["group"] = inputDir + it->second; }
119 it = parameters.find("name");
120 //user has given a template file
121 if(it != parameters.end()){
122 path = m->hasPath(it->second);
123 //if the user has not given a path then, add inputdir. else leave path alone.
124 if (path == "") { parameters["name"] = inputDir + it->second; }
131 m->Treenames.clear();
134 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
136 randomtree = validParameter.validFile(parameters, "random", false); if (randomtree == "not found") { randomtree = ""; }
138 //are you trying to use parsimony without reading a tree or saying you want random distribution
139 if (randomtree == "") {
140 //check for required parameters
141 treefile = validParameter.validFile(parameters, "tree", true);
142 if (treefile == "not open") { treefile = ""; abort = true; }
143 else if (treefile == "not found") { //if there is a current design file, use it
144 treefile = m->getTreeFile();
145 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
146 else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
147 }else { m->setTreeFile(treefile); }
149 //check for required parameters
150 groupfile = validParameter.validFile(parameters, "group", true);
151 if (groupfile == "not open") { abort = true; }
152 else if (groupfile == "not found") { groupfile = ""; }
153 else { m->setGroupFile(groupfile); }
155 namefile = validParameter.validFile(parameters, "name", true);
156 if (namefile == "not open") { namefile = ""; abort = true; }
157 else if (namefile == "not found") { namefile = ""; }
158 else { m->setNameFile(namefile); }
161 //if the user changes the output directory command factory will send this info to us in the output parameter
162 string outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; if (randomtree == "") { outputDir += m->hasPath(treefile); } }
164 //check for optional parameter and set defaults
165 // ...at some point should added some additional type checking...
166 groups = validParameter.validFile(parameters, "groups", false);
167 if (groups == "not found") { groups = ""; m->clearGroups(); }
169 m->splitAtDash(groups, Groups);
170 m->setGroups(Groups);
173 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
174 m->mothurConvert(itersString, iters);
176 string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
177 m->setProcessors(temp);
178 m->mothurConvert(temp, processors);
180 if (namefile == "") {
181 vector<string> files; files.push_back(treefile);
182 parser.getNameFile(files);
188 catch(exception& e) {
189 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
193 /***********************************************************/
194 int ParsimonyCommand::execute() {
197 if (abort == true) { if (calledHelp) { return 0; } return 2; }
200 //randomtree will tell us if user had their own treefile or if they just want the random distribution
201 //user has entered their own tree
202 if (randomtree == "") {
204 m->setTreeFile(treefile);
206 if (groupfile != "") {
207 //read in group map info.
208 tmap = new TreeMap(groupfile);
210 }else{ //fake out by putting everyone in one group
211 Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
212 tmap = new TreeMap();
214 for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
217 if (namefile != "") { readNamesFile(); }
219 read = new ReadNewickTree(treefile);
220 int readOk = read->read(tmap);
222 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
224 read->AssembleTrees();
225 T = read->getTrees();
228 //make sure all files match
229 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
231 if (namefile != "") {
232 if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
233 else { numNamesInTree = m->Treenames.size(); }
234 }else { numNamesInTree = m->Treenames.size(); }
237 //output any names that are in group file but not in tree
238 if (numNamesInTree < tmap->getNumSeqs()) {
239 for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
240 //is that name in the tree?
242 for (int j = 0; j < m->Treenames.size(); j++) {
243 if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
247 if (m->control_pressed) {
248 delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
249 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
254 //then you did not find it so report it
255 if (count == m->Treenames.size()) {
256 //if it is in your namefile then don't remove
257 map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
259 if (it == nameMap.end()) {
260 m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
261 tmap->removeSeq(tmap->namesOfSeqs[i]);
262 i--; //need this because removeSeq removes name from namesOfSeqs
268 if(outputDir == "") { outputDir += m->hasPath(treefile); }
269 output = new ColumnFile(outputDir + m->getSimpleName(treefile) + ".parsimony", itersString);
270 outputNames.push_back(outputDir + m->getSimpleName(treefile) + ".parsimony");
271 outputTypes["parsimony"].push_back(outputDir + m->getSimpleName(treefile) + ".parsimony");
273 sumFile = outputDir + m->getSimpleName(treefile) + ".psummary";
274 m->openOutputFile(sumFile, outSum);
275 outputNames.push_back(sumFile);
276 outputTypes["psummary"].push_back(sumFile);
277 }else { //user wants random distribution
280 if(outputDir == "") { outputDir += m->hasPath(randomtree); }
281 output = new ColumnFile(outputDir+ m->getSimpleName(randomtree), itersString);
282 outputNames.push_back(outputDir+ m->getSimpleName(randomtree));
283 outputTypes["parsimony"].push_back(outputDir+ m->getSimpleName(randomtree));
286 //set users groups to analyze
287 util = new SharedUtil();
288 vector<string> mGroups = m->getGroups();
289 vector<string> tGroups = tmap->getNamesOfGroups();
290 util->setGroups(mGroups, tGroups, allGroups, numGroups, "parsimony"); //sets the groups the user wants to analyze
291 util->getCombos(groupComb, mGroups, numComp);
292 m->setGroups(mGroups);
295 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
297 pars = new Parsimony(tmap);
301 reading = new Progress("Comparing to random:", iters);
303 if (m->control_pressed) {
304 delete reading; delete pars; delete output;
305 delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
306 if (randomtree == "") { outSum.close(); }
307 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
313 //get pscore for users tree
314 userData.resize(numComp,0); //data = AB, AC, BC, ABC.
315 randomData.resize(numComp,0); //data = AB, AC, BC, ABC.
316 rscoreFreq.resize(numComp);
317 uscoreFreq.resize(numComp);
318 rCumul.resize(numComp);
319 uCumul.resize(numComp);
320 userTreeScores.resize(numComp);
321 UScoreSig.resize(numComp);
323 if (randomtree == "") {
324 //get pscores for users trees
325 for (int i = 0; i < T.size(); i++) {
326 userData = pars->getValues(T[i], processors, outputDir); //data = AB, AC, BC, ABC.
328 if (m->control_pressed) {
329 delete reading; delete pars; delete output;
330 delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
331 if (randomtree == "") { outSum.close(); }
332 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
338 //output scores for each combination
339 for(int k = 0; k < numComp; k++) {
342 map<int,double>::iterator it = uscoreFreq[k].find(userData[k]);
343 if (it == uscoreFreq[k].end()) {//new score
344 uscoreFreq[k][userData[k]] = 1;
345 }else{ uscoreFreq[k][userData[k]]++; }
347 //add users score to valid scores
348 validScores[userData[k]] = userData[k];
350 //save score for summary file
351 userTreeScores[k].push_back(userData[k]);
355 //get pscores for random trees
356 for (int j = 0; j < iters; j++) {
358 //create new tree with same num nodes and leaves as users
359 randT = new Tree(tmap);
361 //create random relationships between nodes
362 randT->assembleRandomTree();
364 //get pscore of random tree
365 randomData = pars->getValues(randT, processors, outputDir);
367 if (m->control_pressed) {
368 delete reading; delete pars; delete output; delete randT;
369 if (randomtree == "") { outSum.close(); }
370 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
371 delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
376 for(int r = 0; r < numComp; r++) {
377 //add trees pscore to map of scores
378 map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
379 if (it != rscoreFreq[r].end()) {//already have that score
380 rscoreFreq[r][randomData[r]]++;
381 }else{//first time we have seen this score
382 rscoreFreq[r][randomData[r]] = 1;
385 //add randoms score to validscores
386 validScores[randomData[r]] = randomData[r];
389 //update progress bar
396 //get pscores for random trees
397 for (int j = 0; j < iters; j++) {
399 //create new tree with same num nodes and leaves as users
400 randT = new Tree(tmap);
401 //create random relationships between nodes
403 randT->assembleRandomTree();
405 if (m->control_pressed) {
406 delete reading; delete pars; delete output; delete randT;
408 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
414 //get pscore of random tree
415 randomData = pars->getValues(randT, processors, outputDir);
417 if (m->control_pressed) {
418 delete reading; delete pars; delete output; delete randT;
420 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
425 for(int r = 0; r < numComp; r++) {
426 //add trees pscore to map of scores
427 map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
428 if (it != rscoreFreq[r].end()) {//already have that score
429 rscoreFreq[r][randomData[r]]++;
430 }else{//first time we have seen this score
431 rscoreFreq[r][randomData[r]] = 1;
434 //add randoms score to validscores
435 validScores[randomData[r]] = randomData[r];
438 //update progress bar
445 for(int a = 0; a < numComp; a++) {
446 float rcumul = 0.0000;
447 float ucumul = 0.0000;
448 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
449 for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) {
450 if (randomtree == "") {
451 map<int,double>::iterator it2 = uscoreFreq[a].find(it->first);
452 //user data has that score
453 if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second; }
454 else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score
456 uCumul[a][it->first] = ucumul;
459 //make rscoreFreq map and rCumul
460 map<int,double>::iterator it2 = rscoreFreq[a].find(it->first);
461 //get percentage of random trees with that info
462 if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul+= it2->second; }
463 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
464 rCumul[a][it->first] = rcumul;
467 //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file
468 for (int h = 0; h < userTreeScores[a].size(); h++) {
469 UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]);
473 if (m->control_pressed) {
474 delete reading; delete pars; delete output;
475 delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
476 if (randomtree == "") { outSum.close(); }
477 for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
482 //finish progress bar
487 printParsimonyFile();
488 if (randomtree == "") { printUSummaryFile(); }
490 //reset groups parameter
493 delete pars; delete output;
494 delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
496 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;}
498 m->mothurOutEndLine();
499 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
500 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
501 m->mothurOutEndLine();
507 catch(exception& e) {
508 m->errorOut(e, "ParsimonyCommand", "execute");
513 /***********************************************************/
514 void ParsimonyCommand::printParsimonyFile() {
519 if (randomtree == "") {
520 tags.push_back("Score"); tags.push_back("UserFreq"); tags.push_back("UserCumul"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
522 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
525 for(int a = 0; a < numComp; a++) {
526 output->initFile(groupComb[a], tags);
528 for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) {
529 if (randomtree == "") {
530 data.push_back(it->first); data.push_back(uscoreFreq[a][it->first]); data.push_back(uCumul[a][it->first]); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
532 data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
534 output->output(data);
540 catch(exception& e) {
541 m->errorOut(e, "ParsimonyCommand", "printParsimonyFile");
545 /***********************************************************/
546 int ParsimonyCommand::printUSummaryFile() {
549 outSum << "Tree#" << '\t' << "Groups" << '\t' << "ParsScore" << '\t' << "ParsSig" << endl;
550 m->mothurOut("Tree#\tGroups\tParsScore\tParsSig"); m->mothurOutEndLine();
553 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
557 for (int i = 0; i< T.size(); i++) {
558 for(int a = 0; a < numComp; a++) {
559 if (m->control_pressed) { outSum.close(); return 0; }
560 if (UScoreSig[a][i] > (1/(float)iters)) {
561 outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
562 cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
563 m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString(UScoreSig[a][i])); m->mothurOutEndLine();
565 outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
566 cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
567 m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString((1/float(iters)))); m->mothurOutEndLine();
575 catch(exception& e) {
576 m->errorOut(e, "ParsimonyCommand", "printUSummaryFile");
581 /***********************************************************/
582 void ParsimonyCommand::getUserInput() {
586 tmap = new TreeMap();
588 m->mothurOut("Please enter the number of groups you would like to analyze: ");
590 m->mothurOutJustToLog(toString(numGroups)); m->mothurOutEndLine();
594 numEachGroup.resize(numGroups, 0);
597 for (int i = 1; i <= numGroups; i++) {
598 m->mothurOut("Please enter the number of sequences in group " + toString(i) + ": ");
600 m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
602 //set tmaps seqsPerGroup
603 tmap->seqsPerGroup[toString(i)] = num;
604 tmap->addGroup(toString(i));
606 //set tmaps namesOfSeqs
607 for (int j = 0; j < num; j++) {
608 tmap->namesOfSeqs.push_back(toString(count));
609 tmap->treemap[toString(count)].groupname = toString(i);
614 //clears buffer so next command doesn't have error
618 m->Treenames = tmap->namesOfSeqs;
621 catch(exception& e) {
622 m->errorOut(e, "ParsimonyCommand", "getUserInput");
626 /*****************************************************************/
627 int ParsimonyCommand::readNamesFile() {
630 numUniquesInName = 0;
633 m->openInputFile(namefile, in);
635 string first, second;
636 map<string, string>::iterator itNames;
639 in >> first >> second; m->gobble(in);
643 itNames = m->names.find(first);
644 if (itNames == m->names.end()) {
645 m->names[first] = second;
647 //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
648 vector<string> dupNames;
649 m->splitAtComma(second, dupNames);
651 for (int i = 0; i < dupNames.size(); i++) {
652 nameMap[dupNames[i]] = dupNames[i];
653 if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); }
655 }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }
661 catch(exception& e) {
662 m->errorOut(e, "ParsimonyCommand", "readNamesFile");
666 /***********************************************************/