]> git.donarmstrong.com Git - mothur.git/blob - summarysharedcommand.cpp
created mothurOut class to handle logfiles
[mothur.git] / summarysharedcommand.cpp
1 /*
2  *  summarysharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarysharedcommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharednseqs.h"
15 #include "sharedjabund.h"
16 #include "sharedsorabund.h"
17 #include "sharedjclass.h"
18 #include "sharedsorclass.h"
19 #include "sharedjest.h"
20 #include "sharedsorest.h"
21 #include "sharedthetayc.h"
22 #include "sharedthetan.h"
23 #include "sharedkstest.h"
24 #include "whittaker.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
34
35
36 //**********************************************************************************************************************
37
38 SummarySharedCommand::SummarySharedCommand(string option)  {
39         try {
40                 globaldata = GlobalData::getInstance();
41                 abort = false;
42                 allLines = 1;
43                 labels.clear();
44                 Estimators.clear();
45                 
46                 //allow user to run help
47                 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
48                 
49                 else {
50                         //valid paramters for this command
51                         string Array[] =  {"label","calc","groups","all","outputdir","inputdir"};
52                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
53                         
54                         OptionParser parser(option);
55                         map<string, string> parameters = parser.getParameters();
56                         
57                         ValidParameters validParameter;
58                 
59                         //check to make sure all parameters are valid for command
60                         for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
61                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
62                         }
63                         
64                         //make sure the user has already run the read.otu command
65                         if (globaldata->getSharedFile() == "") {
66                                  m->mothurOut("You must read a list and a group, or a shared before you can use the summary.shared command."); m->mothurOutEndLine(); abort = true; 
67                         }
68                         
69                         //if the user changes the output directory command factory will send this info to us in the output parameter 
70                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
71                                 outputDir = ""; 
72                                 outputDir += hasPath(globaldata->getSharedFile()); //if user entered a file with a path then preserve it        
73                         }
74
75                         //check for optional parameter and set defaults
76                         // ...at some point should added some additional type checking...
77                         label = validParameter.validFile(parameters, "label", false);                   
78                         if (label == "not found") { label = ""; }
79                         else { 
80                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
81                                 else { allLines = 1;  }
82                         }
83                         
84                         //if the user has not specified any labels use the ones from read.otu
85                         if(label == "") {  
86                                 allLines = globaldata->allLines; 
87                                 labels = globaldata->labels; 
88                         }
89                                 
90                         calc = validParameter.validFile(parameters, "calc", false);                     
91                         if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
92                         else { 
93                                  if (calc == "default")  {  calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
94                         }
95                         splitAtDash(calc, Estimators);
96                         
97                         groups = validParameter.validFile(parameters, "groups", false);                 
98                         if (groups == "not found") { groups = ""; }
99                         else { 
100                                 splitAtDash(groups, Groups);
101                                 globaldata->Groups = Groups;
102                         }
103                         
104                         string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
105                         all = isTrue(temp);
106                         
107                         if (abort == false) {
108                         
109                                 validCalculator = new ValidCalculators();
110                                 int i;
111                                 
112                                 for (i=0; i<Estimators.size(); i++) {
113                                         if (validCalculator->isValidCalculator("sharedsummary", Estimators[i]) == true) { 
114                                                 if (Estimators[i] == "sharedsobs") { 
115                                                         sumCalculators.push_back(new SharedSobsCS());
116                                                 }else if (Estimators[i] == "sharedchao") { 
117                                                         sumCalculators.push_back(new SharedChao1());
118                                                 }else if (Estimators[i] == "sharedace") { 
119                                                         sumCalculators.push_back(new SharedAce());
120                                                 }else if (Estimators[i] == "jabund") {  
121                                                         sumCalculators.push_back(new JAbund());
122                                                 }else if (Estimators[i] == "sorabund") { 
123                                                         sumCalculators.push_back(new SorAbund());
124                                                 }else if (Estimators[i] == "jclass") { 
125                                                         sumCalculators.push_back(new Jclass());
126                                                 }else if (Estimators[i] == "sorclass") { 
127                                                         sumCalculators.push_back(new SorClass());
128                                                 }else if (Estimators[i] == "jest") { 
129                                                         sumCalculators.push_back(new Jest());
130                                                 }else if (Estimators[i] == "sorest") { 
131                                                         sumCalculators.push_back(new SorEst());
132                                                 }else if (Estimators[i] == "thetayc") { 
133                                                         sumCalculators.push_back(new ThetaYC());
134                                                 }else if (Estimators[i] == "thetan") { 
135                                                         sumCalculators.push_back(new ThetaN());
136                                                 }else if (Estimators[i] == "kstest") { 
137                                                         sumCalculators.push_back(new KSTest());
138                                                 }else if (Estimators[i] == "sharednseqs") { 
139                                                         sumCalculators.push_back(new SharedNSeqs());
140                                                 }else if (Estimators[i] == "ochiai") { 
141                                                         sumCalculators.push_back(new Ochiai());
142                                                 }else if (Estimators[i] == "anderberg") { 
143                                                         sumCalculators.push_back(new Anderberg());
144                                                 }else if (Estimators[i] == "kulczynski") { 
145                                                         sumCalculators.push_back(new Kulczynski());
146                                                 }else if (Estimators[i] == "kulczynskicody") { 
147                                                         sumCalculators.push_back(new KulczynskiCody());
148                                                 }else if (Estimators[i] == "lennon") { 
149                                                         sumCalculators.push_back(new Lennon());
150                                                 }else if (Estimators[i] == "morisitahorn") { 
151                                                         sumCalculators.push_back(new MorHorn());
152                                                 }else if (Estimators[i] == "braycurtis") { 
153                                                         sumCalculators.push_back(new BrayCurtis());
154                                                 }else if (Estimators[i] == "whittaker") { 
155                                                         sumCalculators.push_back(new Whittaker());
156                                                 }
157                                         }
158                                 }
159                                 
160                                 outputFileName = outputDir + getRootName(getSimpleName(globaldata->inputFileName)) + "shared.summary";
161                                 openOutputFile(outputFileName, outputFileHandle);
162                                 outputNames.push_back(outputFileName);
163                                 
164                                 mult = false;
165                         }
166                 }
167         }
168         catch(exception& e) {
169                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
170                 exit(1);
171         }
172 }
173
174 //**********************************************************************************************************************
175
176 void SummarySharedCommand::help(){
177         try {
178                 m->mothurOut("The summary.shared command can only be executed after a successful read.otu command.\n");
179                 m->mothurOut("The summary.shared command parameters are label, calc and all.  No parameters are required.\n");
180                 m->mothurOut("The summary.shared command should be in the following format: \n");
181                 m->mothurOut("summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n");
182                 m->mothurOut("Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n");
183                 validCalculator->printCalc("sharedsummary", cout);
184                 m->mothurOut("The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n");
185                 m->mothurOut("The default value for groups is all the groups in your groupfile.\n");
186                 m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
187                 m->mothurOut("The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n");
188                 m->mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
189                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n");
190                 m->mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n");
191         }
192         catch(exception& e) {
193                 m->errorOut(e, "SummarySharedCommand", "help");
194                 exit(1);
195         }
196 }
197
198 //**********************************************************************************************************************
199
200 SummarySharedCommand::~SummarySharedCommand(){
201         if (abort == false) {
202                 delete read;
203                 delete validCalculator;
204         }
205 }
206
207 //**********************************************************************************************************************
208
209 int SummarySharedCommand::execute(){
210         try {
211         
212                 if (abort == true) { return 0; }
213                 
214                 //if the users entered no valid calculators don't execute command
215                 if (sumCalculators.size() == 0) { return 0; }
216                 //check if any calcs can do multiples
217                 else{
218                         if (all){ 
219                                 for (int i = 0; i < sumCalculators.size(); i++) {
220                                         if (sumCalculators[i]->getMultiple() == true) { mult = true; }
221                                 }
222                         }
223                 }
224                 
225                 //read first line
226                 read = new ReadOTUFile(globaldata->inputFileName);      
227                 read->read(&*globaldata); 
228                         
229                 input = globaldata->ginput;
230                 lookup = input->getSharedRAbundVectors();
231                 string lastLabel = lookup[0]->getLabel();
232                 
233                 //output estimator names as column headers
234                 outputFileHandle << "label" <<'\t' << "comparison" << '\t'; 
235                 for(int i=0;i<sumCalculators.size();i++){
236                         outputFileHandle << '\t' << sumCalculators[i]->getName();
237                 }
238                 outputFileHandle << endl;
239                 
240                 //create file and put column headers for multiple groups file
241                 if (mult == true) {
242                         outAllFileName = ((getRootName(globaldata->inputFileName)) + "sharedmultiple.summary");
243                         openOutputFile(outAllFileName, outAll);
244                         outputNames.push_back(outAllFileName);
245                         
246                         outAll << "label" <<'\t' << "comparison" << '\t'; 
247                         for(int i=0;i<sumCalculators.size();i++){
248                                 if (sumCalculators[i]->getMultiple() == true) { 
249                                         outAll << '\t' << sumCalculators[i]->getName();
250                                 }
251                         }
252                         outAll << endl;
253                 }
254                 
255                 if (lookup.size() < 2) { 
256                         m->mothurOut("I cannot run the command without at least 2 valid groups."); 
257                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
258                         
259                         //close files and clean up
260                         outputFileHandle.close();  remove(outputFileName.c_str());
261                         if (mult == true) {  outAll.close();  remove(outAllFileName.c_str());  }
262                         return 0;
263                 //if you only have 2 groups you don't need a .sharedmultiple file
264                 }else if ((lookup.size() == 2) && (mult == true)) { 
265                         mult = false;
266                         outAll.close();  
267                         remove(outAllFileName.c_str());
268                         outputNames.pop_back();
269                 }
270                                         
271                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
272                 set<string> processedLabels;
273                 set<string> userLabels = labels;
274                         
275                 //as long as you are not at the end of the file or done wih the lines you want
276                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
277                 
278                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
279                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
280                                 process(lookup);
281                                 
282                                 processedLabels.insert(lookup[0]->getLabel());
283                                 userLabels.erase(lookup[0]->getLabel());
284                         }
285                         
286                         if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
287                                         string saveLabel = lookup[0]->getLabel();
288                                         
289                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
290                                         lookup = input->getSharedRAbundVectors(lastLabel);
291
292                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
293                                         process(lookup);
294                                         
295                                         processedLabels.insert(lookup[0]->getLabel());
296                                         userLabels.erase(lookup[0]->getLabel());
297                                         
298                                         //restore real lastlabel to save below
299                                         lookup[0]->setLabel(saveLabel);
300                         }
301                         
302                         lastLabel = lookup[0]->getLabel();                      
303                                 
304                         //get next line to process
305                         //prevent memory leak
306                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
307                         lookup = input->getSharedRAbundVectors();
308                 }
309                 
310                 //output error messages about any remaining user labels
311                 set<string>::iterator it;
312                 bool needToRun = false;
313                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
314                         m->mothurOut("Your file does not include the label " + *it); 
315                         if (processedLabels.count(lastLabel) != 1) {
316                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
317                                 needToRun = true;
318                         }else {
319                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
320                         }
321                 }
322                 
323                 //run last label if you need to
324                 if (needToRun == true)  {
325                                 for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
326                                 lookup = input->getSharedRAbundVectors(lastLabel);
327
328                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
329                                 process(lookup);
330                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
331                 }
332                 
333
334                 //reset groups parameter
335                 globaldata->Groups.clear();  
336                 
337                 //close files
338                 outputFileHandle.close();
339                 if (mult == true) {  outAll.close();  }
340                 
341                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
342                 
343                 delete input;  globaldata->ginput = NULL;
344                 
345                 m->mothurOutEndLine();
346                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
347                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
348                 m->mothurOutEndLine();
349
350                 return 0;
351         }
352         catch(exception& e) {
353                 m->errorOut(e, "SummarySharedCommand", "execute");
354                 exit(1);
355         }
356 }
357
358 /***********************************************************/
359 void SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup) {
360         try {
361                                 //loop through calculators and add to file all for all calcs that can do mutiple groups
362                                 if (mult == true) {
363                                         //output label
364                                         outAll << thisLookup[0]->getLabel() << '\t';
365                                         
366                                         //output groups names
367                                         string outNames = "";
368                                         for (int j = 0; j < thisLookup.size(); j++) {
369                                                 outNames += thisLookup[j]->getGroup() +  "-";
370                                         }
371                                         outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
372                                         outAll << outNames << '\t';
373                                         
374                                         for(int i=0;i<sumCalculators.size();i++){
375                                                 if (sumCalculators[i]->getMultiple() == true) { 
376                                                         sumCalculators[i]->getValues(thisLookup);
377                                                         outAll << '\t';
378                                                         sumCalculators[i]->print(outAll);
379                                                 }
380                                         }
381                                         outAll << endl;
382                                 }
383         
384                                 int n = 1; 
385                                 vector<SharedRAbundVector*> subset;
386                                 for (int k = 0; k < (thisLookup.size() - 1); k++) { // pass cdd each set of groups to commpare
387                                         for (int l = n; l < thisLookup.size(); l++) {
388                                                 
389                                                 outputFileHandle << thisLookup[0]->getLabel() << '\t';
390                                                 
391                                                 subset.clear(); //clear out old pair of sharedrabunds
392                                                 //add new pair of sharedrabunds
393                                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
394                                                 
395                                                 //sort groups to be alphanumeric
396                                                 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
397                                                         outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
398                                                 }else{
399                                                         outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
400                                                 }
401                                                 
402                                                 for(int i=0;i<sumCalculators.size();i++) {
403
404                                                         sumCalculators[i]->getValues(subset); //saves the calculator outputs
405                                                         outputFileHandle << '\t';
406                                                         sumCalculators[i]->print(outputFileHandle);
407                                                 }
408                                                 outputFileHandle << endl;
409                                         }
410                                         n++;
411                                 }
412
413         }
414         catch(exception& e) {
415                 m->errorOut(e, "SummarySharedCommand", "process");
416                 exit(1);
417         }
418 }
419
420 /***********************************************************/