]> git.donarmstrong.com Git - mothur.git/blob - summarysharedcommand.cpp
moved utilities out of mothur.h and into mothurOut class.
[mothur.git] / summarysharedcommand.cpp
1 /*
2  *  summarysharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarysharedcommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharednseqs.h"
15 #include "sharedjabund.h"
16 #include "sharedsorabund.h"
17 #include "sharedjclass.h"
18 #include "sharedsorclass.h"
19 #include "sharedjest.h"
20 #include "sharedsorest.h"
21 #include "sharedthetayc.h"
22 #include "sharedthetan.h"
23 #include "sharedkstest.h"
24 #include "whittaker.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
34
35
36 //**********************************************************************************************************************
37
38 SummarySharedCommand::SummarySharedCommand(string option)  {
39         try {
40                 globaldata = GlobalData::getInstance();
41                 abort = false;
42                 allLines = 1;
43                 labels.clear();
44                 Estimators.clear();
45                 
46                 //allow user to run help
47                 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
48                 
49                 else {
50                         //valid paramters for this command
51                         string Array[] =  {"label","calc","groups","all","outputdir","inputdir"};
52                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
53                         
54                         OptionParser parser(option);
55                         map<string, string> parameters = parser.getParameters();
56                         
57                         ValidParameters validParameter;
58                 
59                         //check to make sure all parameters are valid for command
60                         for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
61                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
62                         }
63                         
64                         //make sure the user has already run the read.otu command
65                         if (globaldata->getSharedFile() == "") {
66                                  m->mothurOut("You must read a list and a group, or a shared before you can use the summary.shared command."); m->mothurOutEndLine(); abort = true; 
67                         }
68                         
69                         //if the user changes the output directory command factory will send this info to us in the output parameter 
70                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
71                                 outputDir = ""; 
72                                 outputDir += m->hasPath(globaldata->getSharedFile()); //if user entered a file with a path then preserve it     
73                         }
74
75                         //check for optional parameter and set defaults
76                         // ...at some point should added some additional type checking...
77                         label = validParameter.validFile(parameters, "label", false);                   
78                         if (label == "not found") { label = ""; }
79                         else { 
80                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
81                                 else { allLines = 1;  }
82                         }
83                         
84                         //if the user has not specified any labels use the ones from read.otu
85                         if(label == "") {  
86                                 allLines = globaldata->allLines; 
87                                 labels = globaldata->labels; 
88                         }
89                                 
90                         calc = validParameter.validFile(parameters, "calc", false);                     
91                         if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
92                         else { 
93                                  if (calc == "default")  {  calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
94                         }
95                         m->splitAtDash(calc, Estimators);
96                         
97                         groups = validParameter.validFile(parameters, "groups", false);                 
98                         if (groups == "not found") { groups = ""; }
99                         else { 
100                                 m->splitAtDash(groups, Groups);
101                                 globaldata->Groups = Groups;
102                         }
103                         
104                         string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
105                         all = m->isTrue(temp);
106                         
107                         if (abort == false) {
108                         
109                                 validCalculator = new ValidCalculators();
110                                 int i;
111                                 
112                                 for (i=0; i<Estimators.size(); i++) {
113                                         if (validCalculator->isValidCalculator("sharedsummary", Estimators[i]) == true) { 
114                                                 if (Estimators[i] == "sharedsobs") { 
115                                                         sumCalculators.push_back(new SharedSobsCS());
116                                                 }else if (Estimators[i] == "sharedchao") { 
117                                                         sumCalculators.push_back(new SharedChao1());
118                                                 }else if (Estimators[i] == "sharedace") { 
119                                                         sumCalculators.push_back(new SharedAce());
120                                                 }else if (Estimators[i] == "jabund") {  
121                                                         sumCalculators.push_back(new JAbund());
122                                                 }else if (Estimators[i] == "sorabund") { 
123                                                         sumCalculators.push_back(new SorAbund());
124                                                 }else if (Estimators[i] == "jclass") { 
125                                                         sumCalculators.push_back(new Jclass());
126                                                 }else if (Estimators[i] == "sorclass") { 
127                                                         sumCalculators.push_back(new SorClass());
128                                                 }else if (Estimators[i] == "jest") { 
129                                                         sumCalculators.push_back(new Jest());
130                                                 }else if (Estimators[i] == "sorest") { 
131                                                         sumCalculators.push_back(new SorEst());
132                                                 }else if (Estimators[i] == "thetayc") { 
133                                                         sumCalculators.push_back(new ThetaYC());
134                                                 }else if (Estimators[i] == "thetan") { 
135                                                         sumCalculators.push_back(new ThetaN());
136                                                 }else if (Estimators[i] == "kstest") { 
137                                                         sumCalculators.push_back(new KSTest());
138                                                 }else if (Estimators[i] == "sharednseqs") { 
139                                                         sumCalculators.push_back(new SharedNSeqs());
140                                                 }else if (Estimators[i] == "ochiai") { 
141                                                         sumCalculators.push_back(new Ochiai());
142                                                 }else if (Estimators[i] == "anderberg") { 
143                                                         sumCalculators.push_back(new Anderberg());
144                                                 }else if (Estimators[i] == "kulczynski") { 
145                                                         sumCalculators.push_back(new Kulczynski());
146                                                 }else if (Estimators[i] == "kulczynskicody") { 
147                                                         sumCalculators.push_back(new KulczynskiCody());
148                                                 }else if (Estimators[i] == "lennon") { 
149                                                         sumCalculators.push_back(new Lennon());
150                                                 }else if (Estimators[i] == "morisitahorn") { 
151                                                         sumCalculators.push_back(new MorHorn());
152                                                 }else if (Estimators[i] == "braycurtis") { 
153                                                         sumCalculators.push_back(new BrayCurtis());
154                                                 }else if (Estimators[i] == "whittaker") { 
155                                                         sumCalculators.push_back(new Whittaker());
156                                                 }
157                                         }
158                                 }
159                                 
160                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "shared.summary";
161                                 m->openOutputFile(outputFileName, outputFileHandle);
162                                 outputNames.push_back(outputFileName);
163                                 
164                                 mult = false;
165                         }
166                 }
167         }
168         catch(exception& e) {
169                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
170                 exit(1);
171         }
172 }
173
174 //**********************************************************************************************************************
175
176 void SummarySharedCommand::help(){
177         try {
178                 m->mothurOut("The summary.shared command can only be executed after a successful read.otu command.\n");
179                 m->mothurOut("The summary.shared command parameters are label, calc and all.  No parameters are required.\n");
180                 m->mothurOut("The summary.shared command should be in the following format: \n");
181                 m->mothurOut("summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n");
182                 m->mothurOut("Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n");
183                 validCalculator->printCalc("sharedsummary", cout);
184                 m->mothurOut("The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n");
185                 m->mothurOut("The default value for groups is all the groups in your groupfile.\n");
186                 m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
187                 m->mothurOut("The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n");
188                 m->mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
189                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n");
190                 m->mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n");
191         }
192         catch(exception& e) {
193                 m->errorOut(e, "SummarySharedCommand", "help");
194                 exit(1);
195         }
196 }
197
198 //**********************************************************************************************************************
199
200 SummarySharedCommand::~SummarySharedCommand(){
201         if (abort == false) {
202                 delete read;
203                 delete validCalculator;
204         }
205 }
206
207 //**********************************************************************************************************************
208
209 int SummarySharedCommand::execute(){
210         try {
211         
212                 if (abort == true) { return 0; }
213                 
214                 //if the users entered no valid calculators don't execute command
215                 if (sumCalculators.size() == 0) { return 0; }
216                 //check if any calcs can do multiples
217                 else{
218                         if (all){ 
219                                 for (int i = 0; i < sumCalculators.size(); i++) {
220                                         if (sumCalculators[i]->getMultiple() == true) { mult = true; }
221                                 }
222                         }
223                 }
224                 
225                 //read first line
226                 read = new ReadOTUFile(globaldata->inputFileName);      
227                 read->read(&*globaldata); 
228                         
229                 input = globaldata->ginput;
230                 lookup = input->getSharedRAbundVectors();
231                 string lastLabel = lookup[0]->getLabel();
232                 
233                 //output estimator names as column headers
234                 outputFileHandle << "label" <<'\t' << "comparison" << '\t'; 
235                 for(int i=0;i<sumCalculators.size();i++){
236                         outputFileHandle << '\t' << sumCalculators[i]->getName();
237                 }
238                 outputFileHandle << endl;
239                 
240                 //create file and put column headers for multiple groups file
241                 if (mult == true) {
242                         outAllFileName = ((m->getRootName(globaldata->inputFileName)) + "sharedmultiple.summary");
243                         m->openOutputFile(outAllFileName, outAll);
244                         outputNames.push_back(outAllFileName);
245                         
246                         outAll << "label" <<'\t' << "comparison" << '\t'; 
247                         for(int i=0;i<sumCalculators.size();i++){
248                                 if (sumCalculators[i]->getMultiple() == true) { 
249                                         outAll << '\t' << sumCalculators[i]->getName();
250                                 }
251                         }
252                         outAll << endl;
253                 }
254                 
255                 if (lookup.size() < 2) { 
256                         m->mothurOut("I cannot run the command without at least 2 valid groups."); 
257                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
258                         
259                         //close files and clean up
260                         outputFileHandle.close();  remove(outputFileName.c_str());
261                         if (mult == true) {  outAll.close();  remove(outAllFileName.c_str());  }
262                         return 0;
263                 //if you only have 2 groups you don't need a .sharedmultiple file
264                 }else if ((lookup.size() == 2) && (mult == true)) { 
265                         mult = false;
266                         outAll.close();  
267                         remove(outAllFileName.c_str());
268                         outputNames.pop_back();
269                 }
270                 
271                 if (m->control_pressed) {
272                         if (mult) { outAll.close();  remove(outAllFileName.c_str());  }
273                         outputFileHandle.close(); remove(outputFileName.c_str()); 
274                         delete input; globaldata->ginput = NULL;
275                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
276                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
277                         globaldata->Groups.clear(); 
278                         return 0;
279                 }
280                                                                 
281                                                                                                                 
282                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
283                 set<string> processedLabels;
284                 set<string> userLabels = labels;
285                         
286                 //as long as you are not at the end of the file or done wih the lines you want
287                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
288                         if (m->control_pressed) {
289                                 if (mult) { outAll.close();  remove(outAllFileName.c_str());  }
290                                 outputFileHandle.close(); remove(outputFileName.c_str()); 
291                                 delete input; globaldata->ginput = NULL;
292                                 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
293                                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
294                                 globaldata->Groups.clear(); 
295                                 return 0;
296                         }
297
298                 
299                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
300                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
301                                 process(lookup);
302                                 
303                                 processedLabels.insert(lookup[0]->getLabel());
304                                 userLabels.erase(lookup[0]->getLabel());
305                         }
306                         
307                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
308                                         string saveLabel = lookup[0]->getLabel();
309                                         
310                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
311                                         lookup = input->getSharedRAbundVectors(lastLabel);
312
313                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
314                                         process(lookup);
315                                         
316                                         processedLabels.insert(lookup[0]->getLabel());
317                                         userLabels.erase(lookup[0]->getLabel());
318                                         
319                                         //restore real lastlabel to save below
320                                         lookup[0]->setLabel(saveLabel);
321                         }
322                         
323                         lastLabel = lookup[0]->getLabel();                      
324                                 
325                         //get next line to process
326                         //prevent memory leak
327                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
328                         lookup = input->getSharedRAbundVectors();
329                 }
330                 
331                 if (m->control_pressed) {
332                         if (mult) { outAll.close();  remove(outAllFileName.c_str());  }
333                         outputFileHandle.close(); remove(outputFileName.c_str()); 
334                         delete input; globaldata->ginput = NULL;
335                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
336                         globaldata->Groups.clear(); 
337                         return 0;
338                 }
339
340                 //output error messages about any remaining user labels
341                 set<string>::iterator it;
342                 bool needToRun = false;
343                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
344                         m->mothurOut("Your file does not include the label " + *it); 
345                         if (processedLabels.count(lastLabel) != 1) {
346                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
347                                 needToRun = true;
348                         }else {
349                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
350                         }
351                 }
352                 
353                 //run last label if you need to
354                 if (needToRun == true)  {
355                                 for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
356                                 lookup = input->getSharedRAbundVectors(lastLabel);
357
358                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
359                                 process(lookup);
360                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
361                 }
362                 
363                                 
364                 //reset groups parameter
365                 globaldata->Groups.clear();  
366                 
367                 //close files
368                 outputFileHandle.close();
369                 if (mult == true) {  outAll.close();  }
370                 
371                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
372                 delete input;  globaldata->ginput = NULL;
373                 
374                 if (m->control_pressed) {
375                         remove(outAllFileName.c_str());  
376                         remove(outputFileName.c_str()); 
377                         return 0;
378                 }
379                 
380                 m->mothurOutEndLine();
381                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
382                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
383                 m->mothurOutEndLine();
384
385                 return 0;
386         }
387         catch(exception& e) {
388                 m->errorOut(e, "SummarySharedCommand", "execute");
389                 exit(1);
390         }
391 }
392
393 /***********************************************************/
394 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup) {
395         try {
396                                 //loop through calculators and add to file all for all calcs that can do mutiple groups
397                                 if (mult == true) {
398                                         //output label
399                                         outAll << thisLookup[0]->getLabel() << '\t';
400                                         
401                                         //output groups names
402                                         string outNames = "";
403                                         for (int j = 0; j < thisLookup.size(); j++) {
404                                                 outNames += thisLookup[j]->getGroup() +  "-";
405                                         }
406                                         outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
407                                         outAll << outNames << '\t';
408                                         
409                                         for(int i=0;i<sumCalculators.size();i++){
410                                                 if (sumCalculators[i]->getMultiple() == true) { 
411                                                         sumCalculators[i]->getValues(thisLookup);
412                                                         
413                                                         if (m->control_pressed) { return 1; }
414                                                         
415                                                         outAll << '\t';
416                                                         sumCalculators[i]->print(outAll);
417                                                 }
418                                         }
419                                         outAll << endl;
420                                 }
421         
422                                 int n = 1; 
423                                 vector<SharedRAbundVector*> subset;
424                                 for (int k = 0; k < (thisLookup.size() - 1); k++) { // pass cdd each set of groups to commpare
425                                         for (int l = n; l < thisLookup.size(); l++) {
426                                                 
427                                                 outputFileHandle << thisLookup[0]->getLabel() << '\t';
428                                                 
429                                                 subset.clear(); //clear out old pair of sharedrabunds
430                                                 //add new pair of sharedrabunds
431                                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
432                                                 
433                                                 //sort groups to be alphanumeric
434                                                 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
435                                                         outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
436                                                 }else{
437                                                         outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
438                                                 }
439                                                 
440                                                 for(int i=0;i<sumCalculators.size();i++) {
441
442                                                         sumCalculators[i]->getValues(subset); //saves the calculator outputs
443                                                         
444                                                         if (m->control_pressed) { return 1; }
445                                                         
446                                                         outputFileHandle << '\t';
447                                                         sumCalculators[i]->print(outputFileHandle);
448                                                 }
449                                                 outputFileHandle << endl;
450                                         }
451                                         n++;
452                                 }
453                         return 0;
454         }
455         catch(exception& e) {
456                 m->errorOut(e, "SummarySharedCommand", "process");
457                 exit(1);
458         }
459 }
460
461 /***********************************************************/