]> git.donarmstrong.com Git - mothur.git/blob - parsimonycommand.cpp
bugs for 1.5
[mothur.git] / parsimonycommand.cpp
1 /*
2  *  parsimonycommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/26/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "parsimonycommand.h"
11
12 /***********************************************************/
13 ParsimonyCommand::ParsimonyCommand(string option) {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 abort = false;
17                 Groups.clear();
18                 
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         //valid paramters for this command
24                         string Array[] =  {"random","groups","iters"};
25                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                         
27                         OptionParser parser(option);
28                         map<string, string> parameters = parser.getParameters();
29                         
30                         ValidParameters validParameter;
31                 
32                         //check to make sure all parameters are valid for command
33                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
34                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
35                         }
36                         
37                         randomtree = validParameter.validFile(parameters, "random", false);             if (randomtree == "not found") { randomtree = ""; }
38                         
39                         //are you trying to use parsimony without reading a tree or saying you want random distribution
40                         if (randomtree == "")  {
41                                 if (globaldata->gTree.size() == 0) {
42                                         mothurOut("You must read a treefile and a groupfile or set the randomtree parameter to the output filename you wish, before you may execute the parsimony command."); mothurOutEndLine(); abort = true;  }
43                         }
44                                                 
45                         //check for optional parameter and set defaults
46                         // ...at some point should added some additional type checking...
47                         groups = validParameter.validFile(parameters, "groups", false);                 
48                         if (groups == "not found") { groups = ""; globaldata->Groups.clear(); }
49                         else { 
50                                 splitAtDash(groups, Groups);
51                                 globaldata->Groups = Groups;
52                         }
53                                 
54                         itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
55                         convert(itersString, iters); 
56                                                 
57                         if (abort == false) {
58                                 //randomtree will tell us if user had their own treefile or if they just want the random distribution
59                                 //user has entered their own tree
60                                 if (randomtree == "") { 
61                                         T = globaldata->gTree;
62                                         tmap = globaldata->gTreemap;
63                                         output = new ColumnFile(globaldata->getTreeFile()  +  ".parsimony", itersString);
64                                         sumFile = globaldata->getTreeFile() + ".psummary";
65                                         openOutputFile(sumFile, outSum);
66                                 }else { //user wants random distribution
67                                         savetmap = globaldata->gTreemap;
68                                         getUserInput();
69                                         output = new ColumnFile(randomtree, itersString);
70                                 }
71                                 
72                                 //set users groups to analyze
73                                 util = new SharedUtil();
74                                 util->setGroups(globaldata->Groups, tmap->namesOfGroups, allGroups, numGroups, "parsimony");    //sets the groups the user wants to analyze
75                                 util->getCombos(groupComb, globaldata->Groups, numComp);
76                                 
77                                 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
78                                 
79                                 pars = new Parsimony(tmap);
80                                 counter = 0;
81                                 
82                         }
83                         
84                 }
85
86         }
87         catch(exception& e) {
88                 errorOut(e, "ParsimonyCommand", "ParsimonyCommand");
89                 exit(1);
90         }
91 }
92
93 //**********************************************************************************************************************
94
95 void ParsimonyCommand::help(){
96         try {
97                 mothurOut("The parsimony command can only be executed after a successful read.tree command, unless you use the random parameter.\n");
98                 mothurOut("The parsimony command parameters are random, groups and iters.  No parameters are required.\n");
99                 mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n");
100                 mothurOut("The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n");
101                 mothurOut("The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters).\n");
102                 mothurOut("Example parsimony(random=out, iters=500).\n");
103                 mothurOut("The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony),\n");
104                 mothurOut("and iters is 1000.  The parsimony command output two files: .parsimony and .psummary their descriptions are in the manual.\n");
105                 mothurOut("Note: No spaces between parameter labels (i.e. random), '=' and parameters (i.e.yourOutputFilename).\n\n");
106         }
107         catch(exception& e) {
108                 errorOut(e, "ParsimonyCommand", "help");
109                 exit(1);
110         }
111 }
112
113
114 /***********************************************************/
115 int ParsimonyCommand::execute() {
116         try {
117         
118                 if (abort == true) { return 0; }
119         
120                 Progress* reading;
121                 reading = new Progress("Comparing to random:", iters);
122                 
123                 //get pscore for users tree
124                 userData.resize(numComp,0);  //data = AB, AC, BC, ABC.
125                 randomData.resize(numComp,0);  //data = AB, AC, BC, ABC.
126                 rscoreFreq.resize(numComp);  
127                 uscoreFreq.resize(numComp);  
128                 rCumul.resize(numComp);  
129                 uCumul.resize(numComp);  
130                 userTreeScores.resize(numComp);  
131                 UScoreSig.resize(numComp); 
132                                 
133                 if (randomtree == "") {
134                         //get pscores for users trees
135                         for (int i = 0; i < T.size(); i++) {
136                                 userData = pars->getValues(T[i]);  //data = AB, AC, BC, ABC.
137
138                                 //output scores for each combination
139                                 for(int k = 0; k < numComp; k++) {
140
141                                         //update uscoreFreq
142                                         map<int,double>::iterator it = uscoreFreq[k].find(userData[k]);
143                                         if (it == uscoreFreq[k].end()) {//new score
144                                                 uscoreFreq[k][userData[k]] = 1;
145                                         }else{ uscoreFreq[k][userData[k]]++; }
146                                         
147                                         //add users score to valid scores
148                                         validScores[userData[k]] = userData[k];
149                                         
150                                         //save score for summary file
151                                         userTreeScores[k].push_back(userData[k]);
152                                 }
153                         }
154                         
155                         //get pscores for random trees
156                         for (int j = 0; j < iters; j++) {
157                                 //create new tree with same num nodes and leaves as users
158                                 randT = new Tree();
159
160                                 //create random relationships between nodes
161                                 randT->assembleRandomTree();
162
163                                 //get pscore of random tree
164                                 randomData = pars->getValues(randT);
165                                         
166                                 for(int r = 0; r < numComp; r++) {
167                                         //add trees pscore to map of scores
168                                         map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
169                                         if (it != rscoreFreq[r].end()) {//already have that score
170                                                 rscoreFreq[r][randomData[r]]++;
171                                         }else{//first time we have seen this score
172                                                 rscoreFreq[r][randomData[r]] = 1;
173                                         }
174                         
175                                         //add randoms score to validscores
176                                         validScores[randomData[r]] = randomData[r];
177                                 }
178                                 
179                                 //update progress bar
180                                 reading->update(j);
181                                 
182                                 delete randT;
183                         }
184
185                 }else {
186                         //get pscores for random trees
187                         for (int j = 0; j < iters; j++) {
188                                 //create new tree with same num nodes and leaves as users
189                                 randT = new Tree();
190                                 //create random relationships between nodes
191
192                                 randT->assembleRandomTree();
193
194                                 //get pscore of random tree
195                                 randomData = pars->getValues(randT);
196                         
197                                 for(int r = 0; r < numComp; r++) {
198                                         //add trees pscore to map of scores
199                                         map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
200                                         if (it != rscoreFreq[r].end()) {//already have that score
201                                                 rscoreFreq[r][randomData[r]]++;
202                                         }else{//first time we have seen this score
203                                                 rscoreFreq[r][randomData[r]] = 1;
204                                         }
205                         
206                                         //add randoms score to validscores
207                                         validScores[randomData[r]] = randomData[r];
208                                 }
209                                 
210                                 //update progress bar
211                                 reading->update(j);
212                                 
213                                 delete randT;
214                         }
215                 }
216
217                 for(int a = 0; a < numComp; a++) {
218                         float rcumul = 0.0000;
219                         float ucumul = 0.0000;
220                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
221                         for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
222                                 if (randomtree == "") {
223                                         map<int,double>::iterator it2 = uscoreFreq[a].find(it->first);
224                                         //user data has that score 
225                                         if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second;  }
226                                         else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score
227                                         //make uCumul map
228                                         uCumul[a][it->first] = ucumul;
229                                 }
230                         
231                                 //make rscoreFreq map and rCumul
232                                 map<int,double>::iterator it2 = rscoreFreq[a].find(it->first);
233                                 //get percentage of random trees with that info
234                                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul+= it2->second;  }
235                                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
236                                 rCumul[a][it->first] = rcumul;
237                         }
238                         
239                         //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file
240                         for (int h = 0; h < userTreeScores[a].size(); h++) {
241                                 UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]);
242                         }
243                 }
244                 
245                 //finish progress bar
246                 reading->finish();
247                 delete reading;
248
249                 
250                 printParsimonyFile();
251                 if (randomtree == "") { printUSummaryFile(); }
252                 
253                 //reset globaldata's treemap if you just did random distrib
254                 if (randomtree != "") {
255                         //memory leak prevention
256                         //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap;  }
257                         globaldata->gTreemap = savetmap;
258                 }
259                 
260                 //reset groups parameter
261                 globaldata->Groups.clear(); 
262                 
263                 return 0;
264                 
265         }
266         catch(exception& e) {
267                 errorOut(e, "ParsimonyCommand", "execute");
268                 exit(1);
269         }
270 }
271
272 /***********************************************************/
273 void ParsimonyCommand::printParsimonyFile() {
274         try {
275                 vector<double> data;
276                 vector<string> tags;
277                 
278                 if (randomtree == "") {
279                         tags.push_back("Score"); tags.push_back("UserFreq"); tags.push_back("UserCumul"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
280                 }else {
281                         tags.push_back("Score"); tags.push_back("RandFreq"); tags.push_back("RandCumul");
282                 }
283
284                 for(int a = 0; a < numComp; a++) {
285                         output->initFile(groupComb[a], tags);
286                         //print each line
287                         for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
288                                 if (randomtree == "") {
289                                         data.push_back(it->first);  data.push_back(uscoreFreq[a][it->first]); data.push_back(uCumul[a][it->first]); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
290                                 }else{
291                                         data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); 
292                                 }
293                                 output->output(data);
294                                 data.clear();
295                         } 
296                         output->resetFile();
297                 }
298         }
299         catch(exception& e) {
300                 errorOut(e, "ParsimonyCommand", "printParsimonyFile");
301                 exit(1);
302         }
303 }
304 /***********************************************************/
305 void ParsimonyCommand::printUSummaryFile() {
306         try {
307                 //column headers
308                 outSum << "Tree#" << '\t' << "Groups" << '\t'  <<  "ParsScore" << '\t' << "ParsSig" <<  endl;
309                 mothurOut("Tree#\tGroups\tParsScore\tParsSig"); mothurOutEndLine();
310                 
311                 //format output
312                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
313                 
314                 
315                 //print each line
316                 for (int i = 0; i< T.size(); i++) {
317                         for(int a = 0; a < numComp; a++) {
318                                 if (UScoreSig[a][i] > (1/(float)iters)) {
319                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[a]  << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
320                                         cout << setprecision(6) << i+1 << '\t' << groupComb[a]  << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << UScoreSig[a][i] << endl;
321                                         mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString(UScoreSig[a][i])); mothurOutEndLine();
322                                 }else {
323                                         outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length())  << '\t' << "<" << (1/float(iters)) << endl;
324                                         cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(itersString.length()) << '\t' << "<" << (1/float(iters)) << endl;
325                                         mothurOutJustToLog(toString(i+1) + "\t" + groupComb[a] + "\t" + toString(userTreeScores[a][i]) + "\t" + toString((1/float(iters)))); mothurOutEndLine();
326                                 }
327                         }
328                 }
329                 
330                 outSum.close();
331         }
332         catch(exception& e) {
333                 errorOut(e, "ParsimonyCommand", "printUSummaryFile");
334                 exit(1);
335         }
336 }
337
338 /***********************************************************/
339 void ParsimonyCommand::getUserInput() {
340         try {
341         
342                 //create treemap
343                 tmap = new TreeMap();
344
345                 mothurOut("Please enter the number of groups you would like to analyze: ");
346                 cin >> numGroups;
347                 mothurOutJustToLog(toString(numGroups)); mothurOutEndLine();
348                                 
349                 int num, count;
350                 count = 1;
351                 numEachGroup.resize(numGroups, 0);  
352                 
353                 for (int i = 1; i <= numGroups; i++) {
354                         mothurOut("Please enter the number of sequences in group " + toString(i) +  ": ");
355                         cin >> num;
356                         mothurOutJustToLog(toString(num)); mothurOutEndLine();
357                                 
358                         //set tmaps seqsPerGroup
359                         tmap->seqsPerGroup[toString(i)] = num;
360                         tmap->namesOfGroups.push_back(toString(i));
361                         
362                         //set tmaps namesOfSeqs
363                         for (int j = 0; j < num; j++) {
364                                 tmap->namesOfSeqs.push_back(toString(count));
365                                 tmap->treemap[toString(count)].groupname = toString(i);
366                                 count++;
367                         }
368                 }
369                 
370                 //clears buffer so next command doesn't have error
371                 string s;       
372                 getline(cin, s);
373                 
374                 //save tmap for later
375                 //memory leak prevention
376                 //if (globaldata->gTreemap != NULL) { delete globaldata->gTreemap;  }
377                 globaldata->gTreemap = tmap;
378                 globaldata->Treenames = tmap->namesOfSeqs; 
379                 
380         }
381         catch(exception& e) {
382                 errorOut(e, "ParsimonyCommand", "getUserInput");
383                 exit(1);
384         }
385 }
386
387 /***********************************************************/
388
389