]> git.donarmstrong.com Git - mothur.git/blob - summarysharedcommand.cpp
finished work on classify.seqs bayesian method and various bug fixes
[mothur.git] / summarysharedcommand.cpp
1 /*
2  *  summarysharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarysharedcommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharednseqs.h"
15 #include "sharedjabund.h"
16 #include "sharedsorabund.h"
17 #include "sharedjclass.h"
18 #include "sharedsorclass.h"
19 #include "sharedjest.h"
20 #include "sharedsorest.h"
21 #include "sharedthetayc.h"
22 #include "sharedthetan.h"
23 #include "sharedkstest.h"
24 #include "whittaker.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
34
35
36 //**********************************************************************************************************************
37
38 SummarySharedCommand::SummarySharedCommand(string option){
39         try {
40                 globaldata = GlobalData::getInstance();
41                 abort = false;
42                 allLines = 1;
43                 labels.clear();
44                 Estimators.clear();
45                 
46                 //allow user to run help
47                 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
48                 
49                 else {
50                         //valid paramters for this command
51                         string Array[] =  {"label","calc","groups","all"};
52                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
53                         
54                         OptionParser parser(option);
55                         map<string, string> parameters = parser.getParameters();
56                         
57                         ValidParameters validParameter;
58                 
59                         //check to make sure all parameters are valid for command
60                         for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
61                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
62                         }
63                         
64                         //make sure the user has already run the read.otu command
65                         if (globaldata->getSharedFile() == "") {
66                                  mothurOut("You must read a list and a group, or a shared before you can use the summary.shared command."); mothurOutEndLine(); abort = true; 
67                         }
68                         
69                         //check for optional parameter and set defaults
70                         // ...at some point should added some additional type checking...
71                         label = validParameter.validFile(parameters, "label", false);                   
72                         if (label == "not found") { label = ""; }
73                         else { 
74                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
75                                 else { allLines = 1;  }
76                         }
77                         
78                         //if the user has not specified any labels use the ones from read.otu
79                         if(label == "") {  
80                                 allLines = globaldata->allLines; 
81                                 labels = globaldata->labels; 
82                         }
83                                 
84                         calc = validParameter.validFile(parameters, "calc", false);                     
85                         if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
86                         else { 
87                                  if (calc == "default")  {  calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
88                         }
89                         splitAtDash(calc, Estimators);
90                         
91                         groups = validParameter.validFile(parameters, "groups", false);                 
92                         if (groups == "not found") { groups = ""; }
93                         else { 
94                                 splitAtDash(groups, Groups);
95                                 globaldata->Groups = Groups;
96                         }
97                         
98                         string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "true"; }
99                         all = isTrue(temp);
100                         
101                         if (abort == false) {
102                         
103                                 validCalculator = new ValidCalculators();
104                                 int i;
105                                 
106                                 for (i=0; i<Estimators.size(); i++) {
107                                         if (validCalculator->isValidCalculator("sharedsummary", Estimators[i]) == true) { 
108                                                 if (Estimators[i] == "sharedsobs") { 
109                                                         sumCalculators.push_back(new SharedSobsCS());
110                                                 }else if (Estimators[i] == "sharedchao") { 
111                                                         sumCalculators.push_back(new SharedChao1());
112                                                 }else if (Estimators[i] == "sharedace") { 
113                                                         sumCalculators.push_back(new SharedAce());
114                                                 }else if (Estimators[i] == "jabund") {  
115                                                         sumCalculators.push_back(new JAbund());
116                                                 }else if (Estimators[i] == "sorabund") { 
117                                                         sumCalculators.push_back(new SorAbund());
118                                                 }else if (Estimators[i] == "jclass") { 
119                                                         sumCalculators.push_back(new Jclass());
120                                                 }else if (Estimators[i] == "sorclass") { 
121                                                         sumCalculators.push_back(new SorClass());
122                                                 }else if (Estimators[i] == "jest") { 
123                                                         sumCalculators.push_back(new Jest());
124                                                 }else if (Estimators[i] == "sorest") { 
125                                                         sumCalculators.push_back(new SorEst());
126                                                 }else if (Estimators[i] == "thetayc") { 
127                                                         sumCalculators.push_back(new ThetaYC());
128                                                 }else if (Estimators[i] == "thetan") { 
129                                                         sumCalculators.push_back(new ThetaN());
130                                                 }else if (Estimators[i] == "kstest") { 
131                                                         sumCalculators.push_back(new KSTest());
132                                                 }else if (Estimators[i] == "sharednseqs") { 
133                                                         sumCalculators.push_back(new SharedNSeqs());
134                                                 }else if (Estimators[i] == "ochiai") { 
135                                                         sumCalculators.push_back(new Ochiai());
136                                                 }else if (Estimators[i] == "anderberg") { 
137                                                         sumCalculators.push_back(new Anderberg());
138                                                 }else if (Estimators[i] == "kulczynski") { 
139                                                         sumCalculators.push_back(new Kulczynski());
140                                                 }else if (Estimators[i] == "kulczynskicody") { 
141                                                         sumCalculators.push_back(new KulczynskiCody());
142                                                 }else if (Estimators[i] == "lennon") { 
143                                                         sumCalculators.push_back(new Lennon());
144                                                 }else if (Estimators[i] == "morisitahorn") { 
145                                                         sumCalculators.push_back(new MorHorn());
146                                                 }else if (Estimators[i] == "braycurtis") { 
147                                                         sumCalculators.push_back(new BrayCurtis());
148                                                 }else if (Estimators[i] == "whittaker") { 
149                                                         sumCalculators.push_back(new Whittaker());
150                                                 }
151                                         }
152                                 }
153                                 
154                                 outputFileName = ((getRootName(globaldata->inputFileName)) + "shared.summary");
155                                 openOutputFile(outputFileName, outputFileHandle);
156                                 mult = false;
157                         }
158                 }
159         }
160         catch(exception& e) {
161                 errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
162                 exit(1);
163         }
164 }
165
166 //**********************************************************************************************************************
167
168 void SummarySharedCommand::help(){
169         try {
170                 mothurOut("The summary.shared command can only be executed after a successful read.otu command.\n");
171                 mothurOut("The summary.shared command parameters are label, calc and all.  No parameters are required.\n");
172                 mothurOut("The summary.shared command should be in the following format: \n");
173                 mothurOut("summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n");
174                 mothurOut("Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n");
175                 validCalculator->printCalc("sharedsummary", cout);
176                 mothurOut("The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n");
177                 mothurOut("The default value for groups is all the groups in your groupfile.\n");
178                 mothurOut("The label parameter is used to analyze specific labels in your input.\n");
179                 mothurOut("The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is true.\n");
180                 mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
181                 mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n");
182                 mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n");
183         }
184         catch(exception& e) {
185                 errorOut(e, "SummarySharedCommand", "help");
186                 exit(1);
187         }
188 }
189
190 //**********************************************************************************************************************
191
192 SummarySharedCommand::~SummarySharedCommand(){
193         if (abort == false) {
194                 delete read;
195                 delete validCalculator;
196         }
197 }
198
199 //**********************************************************************************************************************
200
201 int SummarySharedCommand::execute(){
202         try {
203         
204                 if (abort == true) { return 0; }
205         
206                 //if the users entered no valid calculators don't execute command
207                 if (sumCalculators.size() == 0) { return 0; }
208                 //check if any calcs can do multiples
209                 else{
210                         if (all){ 
211                                 for (int i = 0; i < sumCalculators.size(); i++) {
212                                         if (sumCalculators[i]->getMultiple() == true) { mult = true; }
213                                 }
214                         }
215                 }
216                 
217                 //read first line
218                 read = new ReadOTUFile(globaldata->inputFileName);      
219                 read->read(&*globaldata); 
220                         
221                 input = globaldata->ginput;
222                 lookup = input->getSharedRAbundVectors();
223                 string lastLabel = lookup[0]->getLabel();
224                 
225                 //output estimator names as column headers
226                 outputFileHandle << "label" <<'\t' << "comparison" << '\t'; 
227                 for(int i=0;i<sumCalculators.size();i++){
228                         outputFileHandle << '\t' << sumCalculators[i]->getName();
229                 }
230                 outputFileHandle << endl;
231                 
232                 //create file and put column headers for multiple groups file
233                 if (mult == true) {
234                         outAllFileName = ((getRootName(globaldata->inputFileName)) + "sharedmultiple.summary");
235                         openOutputFile(outAllFileName, outAll);
236                         
237                         outAll << "label" <<'\t' << "comparison" << '\t'; 
238                         for(int i=0;i<sumCalculators.size();i++){
239                                 if (sumCalculators[i]->getMultiple() == true) { 
240                                         outAll << '\t' << sumCalculators[i]->getName();
241                                 }
242                         }
243                         outAll << endl;
244                 }
245                 
246                 if (lookup.size() < 2) { 
247                         mothurOut("I cannot run the command without at least 2 valid groups."); 
248                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
249                         
250                         //close files and clean up
251                         outputFileHandle.close();  remove(outputFileName.c_str());
252                         if (mult == true) {  outAll.close();  remove(outAllFileName.c_str());  }
253                         return 0;
254                 //if you only have 2 groups you don't need a .sharedmultiple file
255                 }else if ((lookup.size() == 2) && (mult == true)) { 
256                         mult = false;
257                         outAll.close();  
258                         remove(outAllFileName.c_str());
259                 }
260                                         
261                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
262                 set<string> processedLabels;
263                 set<string> userLabels = labels;
264                         
265                 //as long as you are not at the end of the file or done wih the lines you want
266                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
267                 
268                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
269                                 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
270                                 process(lookup);
271                                 
272                                 processedLabels.insert(lookup[0]->getLabel());
273                                 userLabels.erase(lookup[0]->getLabel());
274                         }
275                         
276                         if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
277                                         string saveLabel = lookup[0]->getLabel();
278                                         
279                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
280                                         lookup = input->getSharedRAbundVectors(lastLabel);
281
282                                         mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
283                                         process(lookup);
284                                         
285                                         processedLabels.insert(lookup[0]->getLabel());
286                                         userLabels.erase(lookup[0]->getLabel());
287                                         
288                                         //restore real lastlabel to save below
289                                         lookup[0]->setLabel(saveLabel);
290                         }
291                         
292                         lastLabel = lookup[0]->getLabel();                      
293                                 
294                         //get next line to process
295                         //prevent memory leak
296                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
297                         lookup = input->getSharedRAbundVectors();
298                 }
299                 
300                 //output error messages about any remaining user labels
301                 set<string>::iterator it;
302                 bool needToRun = false;
303                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
304                         mothurOut("Your file does not include the label " + *it); 
305                         if (processedLabels.count(lastLabel) != 1) {
306                                 mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
307                                 needToRun = true;
308                         }else {
309                                 mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
310                         }
311                 }
312                 
313                 //run last label if you need to
314                 if (needToRun == true)  {
315                                 for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
316                                 lookup = input->getSharedRAbundVectors(lastLabel);
317
318                                 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
319                                 process(lookup);
320                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
321                 }
322                 
323
324                 //reset groups parameter
325                 globaldata->Groups.clear();  
326                 
327                 //close files
328                 outputFileHandle.close();
329                 if (mult == true) {  outAll.close();  }
330                 
331                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
332                 
333                 delete input;  globaldata->ginput = NULL;
334
335                 return 0;
336         }
337         catch(exception& e) {
338                 errorOut(e, "SummarySharedCommand", "execute");
339                 exit(1);
340         }
341 }
342
343 /***********************************************************/
344 void SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup) {
345         try {
346                                 //loop through calculators and add to file all for all calcs that can do mutiple groups
347                                 if (mult == true) {
348                                         //output label
349                                         outAll << thisLookup[0]->getLabel() << '\t';
350                                         
351                                         //output groups names
352                                         string outNames = "";
353                                         for (int j = 0; j < thisLookup.size(); j++) {
354                                                 outNames += thisLookup[j]->getGroup() +  "-";
355                                         }
356                                         outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
357                                         outAll << outNames << '\t';
358                                         
359                                         for(int i=0;i<sumCalculators.size();i++){
360                                                 if (sumCalculators[i]->getMultiple() == true) { 
361                                                         sumCalculators[i]->getValues(thisLookup);
362                                                         outAll << '\t';
363                                                         sumCalculators[i]->print(outAll);
364                                                 }
365                                         }
366                                         outAll << endl;
367                                 }
368         
369                                 int n = 1; 
370                                 vector<SharedRAbundVector*> subset;
371                                 for (int k = 0; k < (thisLookup.size() - 1); k++) { // pass cdd each set of groups to commpare
372                                         for (int l = n; l < thisLookup.size(); l++) {
373                                                 
374                                                 outputFileHandle << thisLookup[0]->getLabel() << '\t';
375                                                 
376                                                 subset.clear(); //clear out old pair of sharedrabunds
377                                                 //add new pair of sharedrabunds
378                                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
379                                                 
380                                                 //sort groups to be alphanumeric
381                                                 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
382                                                         outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
383                                                 }else{
384                                                         outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
385                                                 }
386                                                 
387                                                 for(int i=0;i<sumCalculators.size();i++) {
388
389                                                         sumCalculators[i]->getValues(subset); //saves the calculator outputs
390                                                         outputFileHandle << '\t';
391                                                         sumCalculators[i]->print(outputFileHandle);
392                                                 }
393                                                 outputFileHandle << endl;
394                                         }
395                                         n++;
396                                 }
397
398         }
399         catch(exception& e) {
400                 errorOut(e, "SummarySharedCommand", "process");
401                 exit(1);
402         }
403 }
404
405 /***********************************************************/