5 * Created by Sarah Westcott on 1/26/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "parsimonycommand.h"
12 /***********************************************************/
13 ParsimonyCommand::ParsimonyCommand(string option) {
15 globaldata = GlobalData::getInstance();
19 //allow user to run help
20 if(option == "help") { help(); abort = true; }
23 //valid paramters for this command
24 string Array[] = {"random","groups","iters","outputdir","inputdir"};
25 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27 OptionParser parser(option);
28 map<string, string> parameters = parser.getParameters();
30 ValidParameters validParameter;
32 //check to make sure all parameters are valid for command
33 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
34 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
37 randomtree = validParameter.validFile(parameters, "random", false); if (randomtree == "not found") { randomtree = ""; }
39 //are you trying to use parsimony without reading a tree or saying you want random distribution
40 if (randomtree == "") {
41 if (globaldata->gTree.size() == 0) {
42 m->mothurOut("You must read a treefile and a groupfile or set the randomtree parameter to the output filename you wish, before you may execute the parsimony command."); m->mothurOutEndLine(); abort = true; }
45 //if the user changes the output directory command factory will send this info to us in the output parameter
46 string outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
48 //check for optional parameter and set defaults
49 // ...at some point should added some additional type checking...
50 groups = validParameter.validFile(parameters, "groups", false);
51 if (groups == "not found") { groups = ""; globaldata->Groups.clear(); }
53 splitAtDash(groups, Groups);
54 globaldata->Groups = Groups;
57 itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; }
58 convert(itersString, iters);
61 //randomtree will tell us if user had their own treefile or if they just want the random distribution
62 //user has entered their own tree
63 if (randomtree == "") {
64 T = globaldata->gTree;
65 tmap = globaldata->gTreemap;
67 if(outputDir == "") { outputDir += hasPath(globaldata->getTreeFile()); }
68 output = new ColumnFile(outputDir + getSimpleName(globaldata->getTreeFile()) + ".parsimony", itersString);
69 outputNames.push_back(outputDir + getSimpleName(globaldata->getTreeFile()) + ".parsimony");
71 sumFile = outputDir + getSimpleName(globaldata->getTreeFile()) + ".psummary";
72 openOutputFile(sumFile, outSum);
73 outputNames.push_back(sumFile);
74 }else { //user wants random distribution
75 savetmap = globaldata->gTreemap;
78 if(outputDir == "") { outputDir += hasPath(randomtree); }
79 output = new ColumnFile(outputDir+ getSimpleName(randomtree), itersString);
80 outputNames.push_back(outputDir+ getSimpleName(randomtree));
83 //set users groups to analyze
84 util = new SharedUtil();
85 util->setGroups(globaldata->Groups, tmap->namesOfGroups, allGroups, numGroups, "parsimony"); //sets the groups the user wants to analyze
86 util->getCombos(groupComb, globaldata->Groups, numComp);
88 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
90 pars = new Parsimony(tmap);
99 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
104 //**********************************************************************************************************************
106 void ParsimonyCommand::help(){
108 m->mothurOut("The parsimony command can only be executed after a successful read.tree command, unless you use the random parameter.\n");
109 m->mothurOut("The parsimony command parameters are random, groups and iters. No parameters are required.\n");
110 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n");
111 m->mothurOut("The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n");
112 m->mothurOut("The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters).\n");
113 m->mothurOut("Example parsimony(random=out, iters=500).\n");
114 m->mothurOut("The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony),\n");
115 m->mothurOut("and iters is 1000. The parsimony command output two files: .parsimony and .psummary their descriptions are in the manual.\n");
116 m->mothurOut("Note: No spaces between parameter labels (i.e. random), '=' and parameters (i.e.yourOutputFilename).\n\n");
118 catch(exception& e) {
119 m->errorOut(e, "ParsimonyCommand", "help");
125 /***********************************************************/
126 int ParsimonyCommand::execute() {
129 if (abort == true) { return 0; }
132 reading = new Progress("Comparing to random:", iters);
134 //get pscore for users tree
135 userData.resize(numComp,0); //data = AB, AC, BC, ABC.
136 randomData.resize(numComp,0); //data = AB, AC, BC, ABC.
137 rscoreFreq.resize(numComp);
138 uscoreFreq.resize(numComp);
139 rCumul.resize(numComp);
140 uCumul.resize(numComp);
141 userTreeScores.resize(numComp);
142 UScoreSig.resize(numComp);
144 if (randomtree == "") {
145 //get pscores for users trees
146 for (int i = 0; i < T.size(); i++) {
147 userData = pars->getValues(T[i]); //data = AB, AC, BC, ABC.
149 //output scores for each combination
150 for(int k = 0; k < numComp; k++) {
153 map<int,double>::iterator it = uscoreFreq[k].find(userData[k]);
154 if (it == uscoreFreq[k].end()) {//new score
155 uscoreFreq[k][userData[k]] = 1;
156 }else{ uscoreFreq[k][userData[k]]++; }
158 //add users score to valid scores
159 validScores[userData[k]] = userData[k];
161 //save score for summary file
162 userTreeScores[k].push_back(userData[k]);
166 //get pscores for random trees
167 for (int j = 0; j < iters; j++) {
168 //create new tree with same num nodes and leaves as users
171 //create random relationships between nodes
172 randT->assembleRandomTree();
174 //get pscore of random tree
175 randomData = pars->getValues(randT);
177 for(int r = 0; r < numComp; r++) {
178 //add trees pscore to map of scores
179 map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
180 if (it != rscoreFreq[r].end()) {//already have that score
181 rscoreFreq[r][randomData[r]]++;
182 }else{//first time we have seen this score
183 rscoreFreq[r][randomData[r]] = 1;
186 //add randoms score to validscores
187 validScores[randomData[r]] = randomData[r];
190 //update progress bar
197 //get pscores for random trees
198 for (int j = 0; j < iters; j++) {
199 //create new tree with same num nodes and leaves as users
201 //create random relationships between nodes
203 randT->assembleRandomTree();
205 //get pscore of random tree
206 randomData = pars->getValues(randT);
208 for(int r = 0; r < numComp; r++) {
209 //add trees pscore to map of scores
210 map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
211 if (it != rscoreFreq[r].end()) {//already have that score
212 rscoreFreq[r][randomData[r]]++;
213 }else{//first time we have seen this score
214 rscoreFreq[r][randomData[r]] = 1;
217 //add randoms score to validscores
218 validScores[randomData[r]] = randomData[r];
221 //update progress bar
228 for(int a = 0; a < numComp; a++) {
229 float rcumul = 0.0000;
230 float ucumul = 0.0000;
231 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
232 for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) {
233 if (randomtree == "") {
234 map<int,double>::iterator it2 = uscoreFreq[a].find(it->first);
235 //user data has that score
236 if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second; }
237 else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score
239 uCumul[a][it->first] = ucumul;
242 //make rscoreFreq map and rCumul
243 map<int,double>::iterator it2 = rscoreFreq[a].find(it->first);
244 //get percentage of random trees with that info
245 if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul+= it2->second; }
246 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
247 rCumul[a][it->first] = rcumul;
250 //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file
251 for (int h = 0; h < userTreeScores[a].size(); h++) {
252 UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]);
256 //finish progress bar
261 printParsimonyFile();
262 if (randomtree == "") { printUSummaryFile(); }
264 //reset globaldata's treemap if you just did random distrib
265 if (randomtree != "") {
266 //memory leak prevention
267 //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap; }
268 globaldata->gTreemap = savetmap;
271 //reset groups parameter
272 globaldata->Groups.clear();
274 m->mothurOutEndLine();
275 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
276 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
277 m->mothurOutEndLine();
283 catch(exception& e) {
284 m->errorOut(e, "ParsimonyCommand", "execute");
289 /***********************************************************/
290 void ParsimonyCommand::printParsimonyFile() {
295 if (randomtree == "") {
296 tags.push_back("Score"); tags.push_back("UserFreq"); tags.push_back("UserCumul"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
298 tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
301 for(int a = 0; a < numComp; a++) {
302 output->initFile(groupComb[a], tags);
304 for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) {
305 if (randomtree == "") {
306 data.push_back(it->first); data.push_back(uscoreFreq[a][it->first]); data.push_back(uCumul[a][it->first]); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
308 data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
310 output->output(data);
316 catch(exception& e) {
317 m->errorOut(e, "ParsimonyCommand", "printParsimonyFile");
321 /***********************************************************/
322 void ParsimonyCommand::printUSummaryFile() {
325 outSum << "Tree#" << '\t' << "Groups" << '\t' << "ParsScore" << '\t' << "ParsSig" << endl;
326 m->mothurOut("Tree#\tGroups\tParsScore\tParsSig"); m->mothurOutEndLine();
329 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
333 for (int i = 0; i< T.size(); i++) {
334 for(int a = 0; a < numComp; a++) {
335 if (UScoreSig[a][i] > (1/(float)iters)) {
336 outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
337 cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
338 m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString(UScoreSig[a][i])); m->mothurOutEndLine();
340 outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
341 cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
342 m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString((1/float(iters)))); m->mothurOutEndLine();
349 catch(exception& e) {
350 m->errorOut(e, "ParsimonyCommand", "printUSummaryFile");
355 /***********************************************************/
356 void ParsimonyCommand::getUserInput() {
360 tmap = new TreeMap();
362 m->mothurOut("Please enter the number of groups you would like to analyze: ");
364 m->mothurOutJustToLog(toString(numGroups)); m->mothurOutEndLine();
368 numEachGroup.resize(numGroups, 0);
370 for (int i = 1; i <= numGroups; i++) {
371 m->mothurOut("Please enter the number of sequences in group " + toString(i) + ": ");
373 m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
375 //set tmaps seqsPerGroup
376 tmap->seqsPerGroup[toString(i)] = num;
377 tmap->namesOfGroups.push_back(toString(i));
379 //set tmaps namesOfSeqs
380 for (int j = 0; j < num; j++) {
381 tmap->namesOfSeqs.push_back(toString(count));
382 tmap->treemap[toString(count)].groupname = toString(i);
387 //clears buffer so next command doesn't have error
391 //save tmap for later
392 //memory leak prevention
393 //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap; }
394 globaldata->gTreemap = tmap;
395 globaldata->Treenames = tmap->namesOfSeqs;
398 catch(exception& e) {
399 m->errorOut(e, "ParsimonyCommand", "getUserInput");
404 /***********************************************************/