5 * Created by Sarah Westcott on 1/26/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "parsimonycommand.h"
12 /***********************************************************/
13 ParsimonyCommand::ParsimonyCommand(string option) {
15 globaldata = GlobalData::getInstance();
19 //allow user to run help
20 if(option == "help") { help(); abort = true; }
23 //valid paramters for this command
24 string Array[] = {"random","groups","iters","outputdir","inputdir"};
25 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27 OptionParser parser(option);
28 map<string, string> parameters = parser.getParameters();
30 ValidParameters validParameter;
32 //check to make sure all parameters are valid for command
33 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
34 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
37 randomtree = validParameter.validFile(parameters, "random", false); if (randomtree == "not found") { randomtree = ""; }
39 //are you trying to use parsimony without reading a tree or saying you want random distribution
40 if (randomtree == "") {
41 if (globaldata->gTree.size() == 0) {
42 m->mothurOut("You must read a treefile and a groupfile or set the randomtree parameter to the output filename you wish, before you may execute the parsimony command."); m->mothurOutEndLine(); abort = true; }
45 //if the user changes the output directory command factory will send this info to us in the output parameter
46 string outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
48 //check for optional parameter and set defaults
49 // ...at some point should added some additional type checking...
50 groups = validParameter.validFile(parameters, "groups", false);
51 if (groups == "not found") { groups = ""; globaldata->Groups.clear(); }
53 splitAtDash(groups, Groups);
54 globaldata->Groups = Groups;
57 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
58 convert(itersString, iters);
61 //randomtree will tell us if user had their own treefile or if they just want the random distribution
62 //user has entered their own tree
63 if (randomtree == "") {
64 T = globaldata->gTree;
65 tmap = globaldata->gTreemap;
67 if(outputDir == "") { outputDir += hasPath(globaldata->getTreeFile()); }
68 output = new ColumnFile(outputDir + getSimpleName(globaldata->getTreeFile()) + ".parsimony", itersString);
69 outputNames.push_back(outputDir + getSimpleName(globaldata->getTreeFile()) + ".parsimony");
71 sumFile = outputDir + getSimpleName(globaldata->getTreeFile()) + ".psummary";
72 openOutputFile(sumFile, outSum);
73 outputNames.push_back(sumFile);
74 }else { //user wants random distribution
75 savetmap = globaldata->gTreemap;
78 if(outputDir == "") { outputDir += hasPath(randomtree); }
79 output = new ColumnFile(outputDir+ getSimpleName(randomtree), itersString);
80 outputNames.push_back(outputDir+ getSimpleName(randomtree));
83 //set users groups to analyze
84 util = new SharedUtil();
85 util->setGroups(globaldata->Groups, tmap->namesOfGroups, allGroups, numGroups, "parsimony"); //sets the groups the user wants to analyze
86 util->getCombos(groupComb, globaldata->Groups, numComp);
88 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
90 pars = new Parsimony(tmap);
99 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
104 //**********************************************************************************************************************
106 void ParsimonyCommand::help(){
108 m->mothurOut("The parsimony command can only be executed after a successful read.tree command, unless you use the random parameter.\n");
109 m->mothurOut("The parsimony command parameters are random, groups and iters. No parameters are required.\n");
110 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n");
111 m->mothurOut("The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n");
112 m->mothurOut("The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters).\n");
113 m->mothurOut("Example parsimony(random=out, iters=500).\n");
114 m->mothurOut("The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony),\n");
115 m->mothurOut("and iters is 1000. The parsimony command output two files: .parsimony and .psummary their descriptions are in the manual.\n");
116 m->mothurOut("Note: No spaces between parameter labels (i.e. random), '=' and parameters (i.e.yourOutputFilename).\n\n");
118 catch(exception& e) {
119 m->errorOut(e, "ParsimonyCommand", "help");
125 /***********************************************************/
126 int ParsimonyCommand::execute() {
129 if (abort == true) { return 0; }
132 reading = new Progress("Comparing to random:", iters);
134 if (m->control_pressed) {
135 delete reading; delete pars; delete util; delete output;
136 if (randomtree == "") { outSum.close(); }
137 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
138 globaldata->Groups.clear();
143 //get pscore for users tree
144 userData.resize(numComp,0); //data = AB, AC, BC, ABC.
145 randomData.resize(numComp,0); //data = AB, AC, BC, ABC.
146 rscoreFreq.resize(numComp);
147 uscoreFreq.resize(numComp);
148 rCumul.resize(numComp);
149 uCumul.resize(numComp);
150 userTreeScores.resize(numComp);
151 UScoreSig.resize(numComp);
153 if (randomtree == "") {
154 //get pscores for users trees
155 for (int i = 0; i < T.size(); i++) {
156 userData = pars->getValues(T[i]); //data = AB, AC, BC, ABC.
158 if (m->control_pressed) {
159 delete reading; delete pars; delete util; delete output;
160 if (randomtree == "") { outSum.close(); }
161 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
162 globaldata->Groups.clear();
167 //output scores for each combination
168 for(int k = 0; k < numComp; k++) {
171 map<int,double>::iterator it = uscoreFreq[k].find(userData[k]);
172 if (it == uscoreFreq[k].end()) {//new score
173 uscoreFreq[k][userData[k]] = 1;
174 }else{ uscoreFreq[k][userData[k]]++; }
176 //add users score to valid scores
177 validScores[userData[k]] = userData[k];
179 //save score for summary file
180 userTreeScores[k].push_back(userData[k]);
184 //get pscores for random trees
185 for (int j = 0; j < iters; j++) {
187 //create new tree with same num nodes and leaves as users
190 //create random relationships between nodes
191 randT->assembleRandomTree();
193 //get pscore of random tree
194 randomData = pars->getValues(randT);
196 if (m->control_pressed) {
197 delete reading; delete pars; delete util; delete output; delete randT;
198 if (randomtree == "") { outSum.close(); }
199 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
200 globaldata->Groups.clear();
204 for(int r = 0; r < numComp; r++) {
205 //add trees pscore to map of scores
206 map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
207 if (it != rscoreFreq[r].end()) {//already have that score
208 rscoreFreq[r][randomData[r]]++;
209 }else{//first time we have seen this score
210 rscoreFreq[r][randomData[r]] = 1;
213 //add randoms score to validscores
214 validScores[randomData[r]] = randomData[r];
217 //update progress bar
224 //get pscores for random trees
225 for (int j = 0; j < iters; j++) {
227 //create new tree with same num nodes and leaves as users
229 //create random relationships between nodes
231 randT->assembleRandomTree();
233 if (m->control_pressed) {
234 delete reading; delete pars; delete util; delete output; delete randT;
235 globaldata->gTreemap = savetmap;
236 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
237 globaldata->Groups.clear();
242 //get pscore of random tree
243 randomData = pars->getValues(randT);
245 if (m->control_pressed) {
246 delete reading; delete pars; delete util; delete output; delete randT;
247 globaldata->gTreemap = savetmap;
248 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
249 globaldata->Groups.clear();
253 for(int r = 0; r < numComp; r++) {
254 //add trees pscore to map of scores
255 map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
256 if (it != rscoreFreq[r].end()) {//already have that score
257 rscoreFreq[r][randomData[r]]++;
258 }else{//first time we have seen this score
259 rscoreFreq[r][randomData[r]] = 1;
262 //add randoms score to validscores
263 validScores[randomData[r]] = randomData[r];
266 //update progress bar
273 for(int a = 0; a < numComp; a++) {
274 float rcumul = 0.0000;
275 float ucumul = 0.0000;
276 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
277 for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) {
278 if (randomtree == "") {
279 map<int,double>::iterator it2 = uscoreFreq[a].find(it->first);
280 //user data has that score
281 if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second; }
282 else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score
284 uCumul[a][it->first] = ucumul;
287 //make rscoreFreq map and rCumul
288 map<int,double>::iterator it2 = rscoreFreq[a].find(it->first);
289 //get percentage of random trees with that info
290 if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul+= it2->second; }
291 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
292 rCumul[a][it->first] = rcumul;
295 //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file
296 for (int h = 0; h < userTreeScores[a].size(); h++) {
297 UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]);
301 if (m->control_pressed) {
302 delete reading; delete pars; delete util; delete output;
303 if (randomtree == "") { outSum.close(); }
304 else { globaldata->gTreemap = savetmap; }
305 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
306 globaldata->Groups.clear();
310 //finish progress bar
315 printParsimonyFile();
316 if (randomtree == "") { printUSummaryFile(); }
318 //reset globaldata's treemap if you just did random distrib
319 if (randomtree != "") {
320 //memory leak prevention
321 //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap; }
322 globaldata->gTreemap = savetmap;
325 //reset groups parameter
326 globaldata->Groups.clear();
328 if (m->control_pressed) {
329 delete pars; delete util; delete output;
330 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
334 m->mothurOutEndLine();
335 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
336 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
337 m->mothurOutEndLine();
343 catch(exception& e) {
344 m->errorOut(e, "ParsimonyCommand", "execute");
349 /***********************************************************/
350 void ParsimonyCommand::printParsimonyFile() {
355 if (randomtree == "") {
356 tags.push_back("Score"); tags.push_back("UserFreq"); tags.push_back("UserCumul"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
358 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
361 for(int a = 0; a < numComp; a++) {
362 output->initFile(groupComb[a], tags);
364 for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) {
365 if (randomtree == "") {
366 data.push_back(it->first); data.push_back(uscoreFreq[a][it->first]); data.push_back(uCumul[a][it->first]); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
368 data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
370 output->output(data);
376 catch(exception& e) {
377 m->errorOut(e, "ParsimonyCommand", "printParsimonyFile");
381 /***********************************************************/
382 int ParsimonyCommand::printUSummaryFile() {
385 outSum << "Tree#" << '\t' << "Groups" << '\t' << "ParsScore" << '\t' << "ParsSig" << endl;
386 m->mothurOut("Tree#\tGroups\tParsScore\tParsSig"); m->mothurOutEndLine();
389 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
393 for (int i = 0; i< T.size(); i++) {
394 for(int a = 0; a < numComp; a++) {
395 if (m->control_pressed) { outSum.close(); return 0; }
396 if (UScoreSig[a][i] > (1/(float)iters)) {
397 outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
398 cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
399 m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString(UScoreSig[a][i])); m->mothurOutEndLine();
401 outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
402 cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
403 m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString((1/float(iters)))); m->mothurOutEndLine();
411 catch(exception& e) {
412 m->errorOut(e, "ParsimonyCommand", "printUSummaryFile");
417 /***********************************************************/
418 void ParsimonyCommand::getUserInput() {
422 tmap = new TreeMap();
424 m->mothurOut("Please enter the number of groups you would like to analyze: ");
426 m->mothurOutJustToLog(toString(numGroups)); m->mothurOutEndLine();
430 numEachGroup.resize(numGroups, 0);
432 for (int i = 1; i <= numGroups; i++) {
433 m->mothurOut("Please enter the number of sequences in group " + toString(i) + ": ");
435 m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
437 //set tmaps seqsPerGroup
438 tmap->seqsPerGroup[toString(i)] = num;
439 tmap->namesOfGroups.push_back(toString(i));
441 //set tmaps namesOfSeqs
442 for (int j = 0; j < num; j++) {
443 tmap->namesOfSeqs.push_back(toString(count));
444 tmap->treemap[toString(count)].groupname = toString(i);
449 //clears buffer so next command doesn't have error
453 //save tmap for later
454 //memory leak prevention
455 //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap; }
456 globaldata->gTreemap = tmap;
457 globaldata->Treenames = tmap->namesOfSeqs;
460 catch(exception& e) {
461 m->errorOut(e, "ParsimonyCommand", "getUserInput");
466 /***********************************************************/