]> git.donarmstrong.com Git - mothur.git/blob - unifracunweightedcommand.cpp
added set.current and get.current commands and modified existing commands to update...
[mothur.git] / unifracunweightedcommand.cpp
1 /*
2  *  unifracunweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracunweightedcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> UnifracUnweightedCommand::getValidParameters(){  
14         try {
15                 string Array[] =  {"groups","iters","distance","random","root", "processors","outputdir","inputdir"};
16                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
17                 return myArray;
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "UnifracUnweightedCommand", "getValidParameters");
21                 exit(1);
22         }
23 }
24 //**********************************************************************************************************************
25 UnifracUnweightedCommand::UnifracUnweightedCommand(){   
26         try {
27                 globaldata = GlobalData::getInstance();
28                 abort = true; calledHelp = true; 
29                 vector<string> tempOutNames;
30                 outputTypes["unweighted"] = tempOutNames;
31                 outputTypes["uwsummary"] = tempOutNames;
32                 outputTypes["phylip"] = tempOutNames;
33                 outputTypes["column"] = tempOutNames;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 vector<string> UnifracUnweightedCommand::getRequiredParameters(){       
42         try {
43                 vector<string> myArray;
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "UnifracUnweightedCommand", "getRequiredParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 vector<string> UnifracUnweightedCommand::getRequiredFiles(){    
53         try {
54                 string Array[] =  {"tree","group"};
55                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
56
57                 return myArray;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "UnifracUnweightedCommand", "getRequiredFiles");
61                 exit(1);
62         }
63 }
64 /***********************************************************/
65 UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
66         try {
67                 globaldata = GlobalData::getInstance();
68                 abort = false; calledHelp = false;   
69                 Groups.clear();
70                         
71                 //allow user to run help
72                 if(option == "help") { help(); abort = true; calledHelp = true; }
73                 
74                 else {
75                         //valid paramters for this command
76                         string Array[] =  {"groups","iters","distance","random","root", "processors","outputdir","inputdir"};
77                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
78                         
79                         OptionParser parser(option);
80                         map<string,string> parameters = parser.getParameters();
81                         
82                         ValidParameters validParameter;
83                 
84                         //check to make sure all parameters are valid for command
85                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
86                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
87                         }
88                         
89                         //initialize outputTypes
90                         vector<string> tempOutNames;
91                         outputTypes["unweighted"] = tempOutNames;
92                         outputTypes["uwsummary"] = tempOutNames;
93                         outputTypes["phylip"] = tempOutNames;
94                         outputTypes["column"] = tempOutNames;
95                         
96                         if (globaldata->gTree.size() == 0) {//no trees were read
97                                 m->mothurOut("You must execute the read.tree command, before you may execute the unifrac.unweighted command."); m->mothurOutEndLine(); abort = true;  }
98                         
99                         //if the user changes the output directory command factory will send this info to us in the output parameter 
100                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
101                                 outputDir = ""; 
102                                 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it       
103                         }
104                                                         
105                         //check for optional parameter and set defaults
106                         // ...at some point should added some additional type checking...
107                         groups = validParameter.validFile(parameters, "groups", false);                 
108                         if (groups == "not found") { groups = ""; }
109                         else { 
110                                 m->splitAtDash(groups, Groups);
111                                 globaldata->Groups = Groups;
112                         }
113                                 
114                         itersString = validParameter.validFile(parameters, "iters", false);                             if (itersString == "not found") { itersString = "1000"; }
115                         convert(itersString, iters); 
116                         
117                         string temp = validParameter.validFile(parameters, "distance", false);                  
118                         if (temp == "not found") { phylip = false; outputForm = ""; }
119                         else{
120                                 if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
121                                 else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
122                         }
123                         
124                         temp = validParameter.validFile(parameters, "random", false);                                   if (temp == "not found") { temp = "f"; }
125                         random = m->isTrue(temp);
126                         
127                         temp = validParameter.validFile(parameters, "root", false);                                     if (temp == "not found") { temp = "F"; }
128                         includeRoot = m->isTrue(temp);
129                         
130                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
131                         convert(temp, processors); 
132                         
133                         if (!random) {  iters = 0;  } //turn off random calcs
134                         
135                         //if user selects distance = true and no groups it won't calc the pairwise
136                         if ((phylip) && (Groups.size() == 0)) {
137                                 groups = "all";
138                                 m->splitAtDash(groups, Groups);
139                                 globaldata->Groups = Groups;
140                         }
141                 
142                         if (abort == false) {
143                                 T = globaldata->gTree;
144                                 tmap = globaldata->gTreemap;
145                                 sumFile = outputDir + m->getSimpleName(globaldata->getTreeFile()) + ".uwsummary";
146                                 outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
147                                 m->openOutputFile(sumFile, outSum);
148                                 
149                                 util = new SharedUtil();
150                                 util->setGroups(globaldata->Groups, tmap->namesOfGroups, allGroups, numGroups, "unweighted");   //sets the groups the user wants to analyze
151                                 util->getCombos(groupComb, globaldata->Groups, numComp);
152                                 
153                                 if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
154                                 
155                                 unweighted = new Unweighted(tmap, includeRoot);
156                                 
157                         }
158                         
159                 }
160                 
161         }
162         catch(exception& e) {
163                 m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
164                 exit(1);
165         }
166 }
167
168 //**********************************************************************************************************************
169
170 void UnifracUnweightedCommand::help(){
171         try {
172                 m->mothurOut("The unifrac.unweighted command can only be executed after a successful read.tree command.\n");
173                 m->mothurOut("The unifrac.unweighted command parameters are groups, iters, distance, processors, root and random.  No parameters are required.\n");
174                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n");
175                 m->mothurOut("The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n");
176                 m->mothurOut("The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n");
177                 m->mothurOut("The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n");
178                 m->mothurOut("The root parameter allows you to include the entire root in your calculations. The default is false, meaning stop at the root for this comparision instead of the root of the entire tree.\n");
179                 m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n");
180                 m->mothurOut("The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n");
181                 m->mothurOut("Example unifrac.unweighted(groups=A-B-C, iters=500).\n");
182                 m->mothurOut("The default value for groups is all the groups in your groupfile, and iters is 1000.\n");
183                 m->mothurOut("The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual.\n");
184                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
185         }
186         catch(exception& e) {
187                 m->errorOut(e, "UnifracUnweightedCommand", "help");
188                 exit(1);
189         }
190 }
191
192
193 /***********************************************************/
194 int UnifracUnweightedCommand::execute() {
195         try {
196                 
197                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
198                 
199                 int start = time(NULL);
200                 
201                 userData.resize(numComp,0);  //data[0] = unweightedscore 
202                 randomData.resize(numComp,0); //data[0] = unweightedscore
203                 //create new tree with same num nodes and leaves as users
204                 
205                 if (numComp < processors) { processors = numComp;  }
206                 
207                 outSum << "Tree#" << '\t' << "Groups" << '\t'  <<  "UWScore" <<'\t' << "UWSig" <<  endl;
208                 m->mothurOut("Tree#\tGroups\tUWScore\tUWSig"); m->mothurOutEndLine();
209                 
210                 //get pscores for users trees
211                 for (int i = 0; i < T.size(); i++) {
212                         if (m->control_pressed) { 
213                                 outSum.close();
214                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  }
215                                 return 0; 
216                         }
217                         
218                         counter = 0;
219                         
220                         if (random)  {  
221                                 output = new ColumnFile(outputDir + m->getSimpleName(globaldata->getTreeFile())  + toString(i+1) + ".unweighted", itersString);
222                                 outputNames.push_back(outputDir + m->getSimpleName(globaldata->getTreeFile())  + toString(i+1) + ".unweighted");
223                                 outputTypes["unweighted"].push_back(outputDir + m->getSimpleName(globaldata->getTreeFile())  + toString(i+1) + ".unweighted");
224                         }
225                         
226                         
227                         //get unweighted for users tree
228                         rscoreFreq.resize(numComp);  
229                         rCumul.resize(numComp);  
230                         utreeScores.resize(numComp);  
231                         UWScoreSig.resize(numComp); 
232
233                         userData = unweighted->getValues(T[i], processors, outputDir);  //userData[0] = unweightedscore
234                 
235                         if (m->control_pressed) { if (random) { delete output;  } outSum.close();  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());  }return 0; }
236                         
237                         //output scores for each combination
238                         for(int k = 0; k < numComp; k++) {
239                                 //saves users score
240                                 utreeScores[k].push_back(userData[k]);
241                                 
242                                 //add users score to validscores
243                                 validScores[userData[k]] = userData[k];
244                         }
245                 
246                         //get unweighted scores for random trees - if random is false iters = 0
247                         for (int j = 0; j < iters; j++) {
248                 
249                                 //we need a different getValues because when we swap the labels we only want to swap those in each pairwise comparison
250                                 randomData = unweighted->getValues(T[i], "", "", processors, outputDir);
251                                 
252                                 if (m->control_pressed) { if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } return 0; }
253                         
254                                 for(int k = 0; k < numComp; k++) {      
255                                         //add trees unweighted score to map of scores
256                                         map<float,float>::iterator it = rscoreFreq[k].find(randomData[k]);
257                                         if (it != rscoreFreq[k].end()) {//already have that score
258                                                 rscoreFreq[k][randomData[k]]++;
259                                         }else{//first time we have seen this score
260                                                 rscoreFreq[k][randomData[k]] = 1;
261                                         }
262                                 
263                                         //add randoms score to validscores
264                                         validScores[randomData[k]] = randomData[k];
265                                 }
266                                 
267                                 //report progress
268 //                              m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();  
269                         }
270         
271                         for(int a = 0; a < numComp; a++) {
272                                 float rcumul = 1.0000;
273                                 
274                                 if (random) {
275                                         //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
276                                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
277                                                 //make rscoreFreq map and rCumul
278                                                 map<float,float>::iterator it2 = rscoreFreq[a].find(it->first);
279                                                 rCumul[a][it->first] = rcumul;
280                                                 //get percentage of random trees with that info
281                                                 if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
282                                                 else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
283                                         }
284                                         UWScoreSig[a].push_back(rCumul[a][userData[a]]);
285                                 }else           {       UWScoreSig[a].push_back(0.0);                                           }
286         
287                         }
288                         
289                         if (m->control_pressed) { if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } return 0;  }
290                         
291                         //print output files
292                         printUWSummaryFile(i);
293                         if (random)  {  printUnweightedFile();  delete output;  }
294                         if (phylip) {   createPhylipFile(i);            }
295                         
296                         rscoreFreq.clear(); 
297                         rCumul.clear();  
298                         validScores.clear(); 
299                         utreeScores.clear();  
300                         UWScoreSig.clear(); 
301                 }
302                 
303
304                 outSum.close();
305                 
306                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  }      return 0; }
307                 
308                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
309                 
310                 //set phylip file as new current phylipfile
311                 string current = "";
312                 itTypes = outputTypes.find("phylip");
313                 if (itTypes != outputTypes.end()) {
314                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
315                 }
316                 
317                 //set column file as new current columnfile
318                 itTypes = outputTypes.find("column");
319                 if (itTypes != outputTypes.end()) {
320                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
321                 }
322                 
323                 m->mothurOutEndLine();
324                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
325                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
326                 m->mothurOutEndLine();
327                 
328                 return 0;
329                 
330         }
331         catch(exception& e) {
332                 m->errorOut(e, "UnifracUnweightedCommand", "execute");
333                 exit(1);
334         }
335 }
336 /***********************************************************/
337 void UnifracUnweightedCommand::printUnweightedFile() {
338         try {
339                 vector<double> data;
340                 vector<string> tags;
341                 
342                 tags.push_back("Score");
343                 tags.push_back("RandFreq"); tags.push_back("RandCumul");
344                         
345                 for(int a = 0; a < numComp; a++) {
346                         output->initFile(groupComb[a], tags);
347                         //print each line
348                         for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
349                                 data.push_back(it->first);  data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);                                             
350                                 output->output(data);
351                                 data.clear();
352                         } 
353                         output->resetFile();
354                 }
355         }
356         catch(exception& e) {
357                 m->errorOut(e, "UnifracUnweightedCommand", "printUnweightedFile");
358                 exit(1);
359         }
360 }
361
362 /***********************************************************/
363 void UnifracUnweightedCommand::printUWSummaryFile(int i) {
364         try {
365                                 
366                 //format output
367                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
368                         
369                 //print each line
370
371                 for(int a = 0; a < numComp; a++) {
372                         outSum << i+1 << '\t';
373                         m->mothurOut(toString(i+1) + "\t");
374                         
375                         if (random) {
376                                 if (UWScoreSig[a][0] > (1/(float)iters)) {
377                                         outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl;
378                                         cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << UWScoreSig[a][0] << endl; 
379                                         m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0])  + "\t" + toString(UWScoreSig[a][0])+ "\n"); 
380                                 }else {
381                                         outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl;
382                                         cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << setprecision(itersString.length()) << "<" << (1/float(iters)) << endl; 
383                                         m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0])  + "\t<" + toString((1/float(iters))) + "\n"); 
384                                 }
385                         }else{
386                                 outSum << setprecision(6) << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << "0.00" << endl;
387                                 cout << setprecision(6)  << groupComb[a]  << '\t' << utreeScores[a][0] << '\t' << "0.00" << endl; 
388                                 m->mothurOutJustToLog(groupComb[a]  + "\t" + toString(utreeScores[a][0])  + "\t0.00\n");
389                         }
390                 }
391                 
392         }
393         catch(exception& e) {
394                 m->errorOut(e, "UnifracUnweightedCommand", "printUWSummaryFile");
395                 exit(1);
396         }
397 }
398 /***********************************************************/
399 void UnifracUnweightedCommand::createPhylipFile(int i) {
400         try {
401                 string phylipFileName;
402                 if ((outputForm == "lt") || (outputForm == "square")) {
403                         phylipFileName = outputDir + m->getSimpleName(globaldata->getTreeFile())  + toString(i+1) + ".unweighted.phylip.dist";
404                         outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
405                 }else { //column
406                         phylipFileName = outputDir + m->getSimpleName(globaldata->getTreeFile())  + toString(i+1) + ".unweighted.column.dist";
407                         outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
408                 }
409                 
410                 ofstream out;
411                 m->openOutputFile(phylipFileName, out);
412                 
413                 if ((outputForm == "lt") || (outputForm == "square")) {
414                         //output numSeqs
415                         out << globaldata->Groups.size() << endl;
416                 }
417                 
418                 //make matrix with scores in it
419                 vector< vector<float> > dists;  dists.resize(globaldata->Groups.size());
420                 for (int i = 0; i < globaldata->Groups.size(); i++) {
421                         dists[i].resize(globaldata->Groups.size(), 0.0);
422                 }
423                 
424                 //flip it so you can print it
425                 int count = 0;
426                 for (int r=0; r<globaldata->Groups.size(); r++) { 
427                         for (int l = 0; l < r; l++) {
428                                 dists[r][l] = utreeScores[count][0];
429                                 dists[l][r] = utreeScores[count][0];
430                                 count++;
431                         }
432                 }
433                 
434                 //output to file
435                 for (int r=0; r<globaldata->Groups.size(); r++) { 
436                         //output name
437                         string name = globaldata->Groups[r];
438                         if (name.length() < 10) { //pad with spaces to make compatible
439                                 while (name.length() < 10) {  name += " ";  }
440                         }
441                         
442                         if (outputForm == "lt") {
443                                 out << name << '\t';
444                         
445                                 //output distances
446                                 for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
447                                 out << endl;
448                         }else if (outputForm == "square") {
449                                 out << name << '\t';
450                                 
451                                 //output distances
452                                 for (int l = 0; l < globaldata->Groups.size(); l++) {   out << dists[r][l] << '\t';  }
453                                 out << endl;
454                         }else{
455                                 //output distances
456                                 for (int l = 0; l < r; l++) {   
457                                         string otherName = globaldata->Groups[l];
458                                         if (otherName.length() < 10) { //pad with spaces to make compatible
459                                                 while (otherName.length() < 10) {  otherName += " ";  }
460                                         }
461                                         
462                                         out  << name << '\t' << otherName << dists[r][l] << endl;  
463                                 }
464                         }
465                 }
466                 out.close();
467         }
468         catch(exception& e) {
469                 m->errorOut(e, "UnifracUnweightedCommand", "createPhylipFile");
470                 exit(1);
471         }
472 }
473 /***********************************************************/
474
475
476