]> git.donarmstrong.com Git - mothur.git/blob - parsimonycommand.cpp
created mothurOut class to handle logfiles
[mothur.git] / parsimonycommand.cpp
1 /*
2  *  parsimonycommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/26/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "parsimonycommand.h"
11
12 /***********************************************************/
13 ParsimonyCommand::ParsimonyCommand(string option)  {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 abort = false;
17                 Groups.clear();
18                         
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         //valid paramters for this command
24                         string Array[] =  {"random","groups","iters","outputdir","inputdir"};
25                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                         
27                         OptionParser parser(option);
28                         map<string, string> parameters = parser.getParameters();
29                         
30                         ValidParameters validParameter;
31                 
32                         //check to make sure all parameters are valid for command
33                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
34                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
35                         }
36                         
37                         randomtree = validParameter.validFile(parameters, "random", false);             if (randomtree == "not found") { randomtree = ""; }
38                         
39                         //are you trying to use parsimony without reading a tree or saying you want random distribution
40                         if (randomtree == "")  {
41                                 if (globaldata->gTree.size() == 0) {
42                                         m->mothurOut("You must read a treefile and a groupfile or set the randomtree parameter to the output filename you wish, before you may execute the parsimony command."); m->mothurOutEndLine(); abort = true;  }
43                         }
44                         
45                         //if the user changes the output directory command factory will send this info to us in the output parameter 
46                         string outputDir = validParameter.validFile(parameters, "outputdir", false);            if (outputDir == "not found"){  outputDir = ""; }
47                         
48                         //check for optional parameter and set defaults
49                         // ...at some point should added some additional type checking...
50                         groups = validParameter.validFile(parameters, "groups", false);                 
51                         if (groups == "not found") { groups = ""; globaldata->Groups.clear(); }
52                         else { 
53                                 splitAtDash(groups, Groups);
54                                 globaldata->Groups = Groups;
55                         }
56                                 
57                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
58                         convert(itersString, iters); 
59                                                 
60                         if (abort == false) {
61                                 //randomtree will tell us if user had their own treefile or if they just want the random distribution
62                                 //user has entered their own tree
63                                 if (randomtree == "") { 
64                                         T = globaldata->gTree;
65                                         tmap = globaldata->gTreemap;
66                                         
67                                         if(outputDir == "") { outputDir += hasPath(globaldata->getTreeFile()); }
68                                         output = new ColumnFile(outputDir + getSimpleName(globaldata->getTreeFile())  +  ".parsimony", itersString);
69                                         outputNames.push_back(outputDir + getSimpleName(globaldata->getTreeFile())  +  ".parsimony");
70                                         
71                                         sumFile = outputDir + getSimpleName(globaldata->getTreeFile()) + ".psummary";
72                                         openOutputFile(sumFile, outSum);
73                                         outputNames.push_back(sumFile);
74                                 }else { //user wants random distribution
75                                         savetmap = globaldata->gTreemap;
76                                         getUserInput();
77                                         
78                                         if(outputDir == "") { outputDir += hasPath(randomtree); }
79                                         output = new ColumnFile(outputDir+ getSimpleName(randomtree), itersString);
80                                         outputNames.push_back(outputDir+ getSimpleName(randomtree));
81                                 }
82                                 
83                                 //set users groups to analyze
84                                 util = new SharedUtil();
85                                 util->setGroups(globaldata->Groups, tmap->namesOfGroups, allGroups, numGroups, "parsimony");    //sets the groups the user wants to analyze
86                                 util->getCombos(groupComb, globaldata->Groups, numComp);
87                                 
88                                 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
89                                 
90                                 pars = new Parsimony(tmap);
91                                 counter = 0;
92                                 
93                         }
94                         
95                 }
96
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
100                 exit(1);
101         }
102 }
103
104 //**********************************************************************************************************************
105
106 void ParsimonyCommand::help(){
107         try {
108                 m->mothurOut("The parsimony command can only be executed after a successful read.tree command, unless you use the random parameter.\n");
109                 m->mothurOut("The parsimony command parameters are random, groups and iters.  No parameters are required.\n");
110                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n");
111                 m->mothurOut("The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n");
112                 m->mothurOut("The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters).\n");
113                 m->mothurOut("Example parsimony(random=out, iters=500).\n");
114                 m->mothurOut("The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony),\n");
115                 m->mothurOut("and iters is 1000.  The parsimony command output two files: .parsimony and .psummary their descriptions are in the manual.\n");
116                 m->mothurOut("Note: No spaces between parameter labels (i.e. random), '=' and parameters (i.e.yourOutputFilename).\n\n");
117         }
118         catch(exception& e) {
119                 m->errorOut(e, "ParsimonyCommand", "help");
120                 exit(1);
121         }
122 }
123
124
125 /***********************************************************/
126 int ParsimonyCommand::execute() {
127         try {
128         
129                 if (abort == true) { return 0; }
130         
131                 Progress* reading;
132                 reading = new Progress("Comparing to random:", iters);
133                 
134                 //get pscore for users tree
135                 userData.resize(numComp,0);  //data = AB, AC, BC, ABC.
136                 randomData.resize(numComp,0);  //data = AB, AC, BC, ABC.
137                 rscoreFreq.resize(numComp);  
138                 uscoreFreq.resize(numComp);  
139                 rCumul.resize(numComp);  
140                 uCumul.resize(numComp);  
141                 userTreeScores.resize(numComp);  
142                 UScoreSig.resize(numComp); 
143                                 
144                 if (randomtree == "") {
145                         //get pscores for users trees
146                         for (int i = 0; i < T.size(); i++) {
147                                 userData = pars->getValues(T[i]);  //data = AB, AC, BC, ABC.
148
149                                 //output scores for each combination
150                                 for(int k = 0; k < numComp; k++) {
151
152                                         //update uscoreFreq
153                                         map<int,double>::iterator it = uscoreFreq[k].find(userData[k]);
154                                         if (it == uscoreFreq[k].end()) {//new score
155                                                 uscoreFreq[k][userData[k]] = 1;
156                                         }else{ uscoreFreq[k][userData[k]]++; }
157                                         
158                                         //add users score to valid scores
159                                         validScores[userData[k]] = userData[k];
160                                         
161                                         //save score for summary file
162                                         userTreeScores[k].push_back(userData[k]);
163                                 }
164                         }
165                         
166                         //get pscores for random trees
167                         for (int j = 0; j < iters; j++) {
168                                 //create new tree with same num nodes and leaves as users
169                                 randT = new Tree();
170
171                                 //create random relationships between nodes
172                                 randT->assembleRandomTree();
173
174                                 //get pscore of random tree
175                                 randomData = pars->getValues(randT);
176                                         
177                                 for(int r = 0; r < numComp; r++) {
178                                         //add trees pscore to map of scores
179                                         map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
180                                         if (it != rscoreFreq[r].end()) {//already have that score
181                                                 rscoreFreq[r][randomData[r]]++;
182                                         }else{//first time we have seen this score
183                                                 rscoreFreq[r][randomData[r]] = 1;
184                                         }
185                         
186                                         //add randoms score to validscores
187                                         validScores[randomData[r]] = randomData[r];
188                                 }
189                                 
190                                 //update progress bar
191                                 reading->update(j);
192                                 
193                                 delete randT;
194                         }
195
196                 }else {
197                         //get pscores for random trees
198                         for (int j = 0; j < iters; j++) {
199                                 //create new tree with same num nodes and leaves as users
200                                 randT = new Tree();
201                                 //create random relationships between nodes
202
203                                 randT->assembleRandomTree();
204
205                                 //get pscore of random tree
206                                 randomData = pars->getValues(randT);
207                         
208                                 for(int r = 0; r < numComp; r++) {
209                                         //add trees pscore to map of scores
210                                         map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
211                                         if (it != rscoreFreq[r].end()) {//already have that score
212                                                 rscoreFreq[r][randomData[r]]++;
213                                         }else{//first time we have seen this score
214                                                 rscoreFreq[r][randomData[r]] = 1;
215                                         }
216                         
217                                         //add randoms score to validscores
218                                         validScores[randomData[r]] = randomData[r];
219                                 }
220                                 
221                                 //update progress bar
222                                 reading->update(j);
223                                 
224                                 delete randT;
225                         }
226                 }
227
228                 for(int a = 0; a < numComp; a++) {
229                         float rcumul = 0.0000;
230                         float ucumul = 0.0000;
231                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
232                         for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
233                                 if (randomtree == "") {
234                                         map<int,double>::iterator it2 = uscoreFreq[a].find(it->first);
235                                         //user data has that score 
236                                         if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second;  }
237                                         else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score
238                                         //make uCumul map
239                                         uCumul[a][it->first] = ucumul;
240                                 }
241                         
242                                 //make rscoreFreq map and rCumul
243                                 map<int,double>::iterator it2 = rscoreFreq[a].find(it->first);
244                                 //get percentage of random trees with that info
245                                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul+= it2->second;  }
246                                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
247                                 rCumul[a][it->first] = rcumul;
248                         }
249                         
250                         //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file
251                         for (int h = 0; h < userTreeScores[a].size(); h++) {
252                                 UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]);
253                         }
254                 }
255                 
256                 //finish progress bar
257                 reading->finish();
258                 delete reading;
259
260                 
261                 printParsimonyFile();
262                 if (randomtree == "") { printUSummaryFile(); }
263                 
264                 //reset globaldata's treemap if you just did random distrib
265                 if (randomtree != "") {
266                         //memory leak prevention
267                         //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap;  }
268                         globaldata->gTreemap = savetmap;
269                 }
270                 
271                 //reset groups parameter
272                 globaldata->Groups.clear(); 
273                 
274                 m->mothurOutEndLine();
275                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
276                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
277                 m->mothurOutEndLine();
278
279                 
280                 return 0;
281                 
282         }
283         catch(exception& e) {
284                 m->errorOut(e, "ParsimonyCommand", "execute");
285                 exit(1);
286         }
287 }
288
289 /***********************************************************/
290 void ParsimonyCommand::printParsimonyFile() {
291         try {
292                 vector<double> data;
293                 vector<string> tags;
294                 
295                 if (randomtree == "") {
296                         tags.push_back("Score"); tags.push_back("UserFreq"); tags.push_back("UserCumul"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
297                 }else {
298                         tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
299                 }
300
301                 for(int a = 0; a < numComp; a++) {
302                         output->initFile(groupComb[a], tags);
303                         //print each line
304                         for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
305                                 if (randomtree == "") {
306                                         data.push_back(it->first);  data.push_back(uscoreFreq[a][it->first]); data.push_back(uCumul[a][it->first]); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
307                                 }else{
308                                         data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
309                                 }
310                                 output->output(data);
311                                 data.clear();
312                         } 
313                         output->resetFile();
314                 }
315         }
316         catch(exception& e) {
317                 m->errorOut(e, "ParsimonyCommand", "printParsimonyFile");
318                 exit(1);
319         }
320 }
321 /***********************************************************/
322 void ParsimonyCommand::printUSummaryFile() {
323         try {
324                 //column headers
325                 outSum << "Tree#" << '\t' << "Groups" << '\t'  <<  "ParsScore" << '\t' << "ParsSig" <<  endl;
326                 m->mothurOut("Tree#\tGroups\tParsScore\tParsSig"); m->mothurOutEndLine();
327                 
328                 //format output
329                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
330                 
331                 
332                 //print each line
333                 for (int i = 0; i< T.size(); i++) {
334                         for(int a = 0; a < numComp; a++) {
335                                 if (UScoreSig[a][i] > (1/(float)iters)) {
336                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[a]  << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
337                                         cout << setprecision(6) << i+1 << '\t' << groupComb[a]  << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
338                                         m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString(UScoreSig[a][i])); m->mothurOutEndLine();
339                                 }else {
340                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length())  << '\t' << "<" << (1/float(iters)) << endl;
341                                         cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
342                                         m->mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString((1/float(iters)))); m->mothurOutEndLine();
343                                 }
344                         }
345                 }
346                 
347                 outSum.close();
348         }
349         catch(exception& e) {
350                 m->errorOut(e, "ParsimonyCommand", "printUSummaryFile");
351                 exit(1);
352         }
353 }
354
355 /***********************************************************/
356 void ParsimonyCommand::getUserInput() {
357         try {
358         
359                 //create treemap
360                 tmap = new TreeMap();
361
362                 m->mothurOut("Please enter the number of groups you would like to analyze: ");
363                 cin >> numGroups;
364                 m->mothurOutJustToLog(toString(numGroups)); m->mothurOutEndLine();
365                                 
366                 int num, count;
367                 count = 1;
368                 numEachGroup.resize(numGroups, 0);  
369                 
370                 for (int i = 1; i <= numGroups; i++) {
371                         m->mothurOut("Please enter the number of sequences in group " + toString(i) +  ": ");
372                         cin >> num;
373                         m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
374                                 
375                         //set tmaps seqsPerGroup
376                         tmap->seqsPerGroup[toString(i)] = num;
377                         tmap->namesOfGroups.push_back(toString(i));
378                         
379                         //set tmaps namesOfSeqs
380                         for (int j = 0; j < num; j++) {
381                                 tmap->namesOfSeqs.push_back(toString(count));
382                                 tmap->treemap[toString(count)].groupname = toString(i);
383                                 count++;
384                         }
385                 }
386                 
387                 //clears buffer so next command doesn't have error
388                 string s;       
389                 getline(cin, s);
390                 
391                 //save tmap for later
392                 //memory leak prevention
393                 //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap;  }
394                 globaldata->gTreemap = tmap;
395                 globaldata->Treenames = tmap->namesOfSeqs; 
396                 
397         }
398         catch(exception& e) {
399                 m->errorOut(e, "ParsimonyCommand", "getUserInput");
400                 exit(1);
401         }
402 }
403
404 /***********************************************************/
405
406