]> git.donarmstrong.com Git - mothur.git/blob - summarysharedcommand.cpp
working on testing
[mothur.git] / summarysharedcommand.cpp
1 /*
2  *  summarysharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarysharedcommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharednseqs.h"
15 #include "sharedjabund.h"
16 #include "sharedsorabund.h"
17 #include "sharedjclass.h"
18 #include "sharedsorclass.h"
19 #include "sharedjest.h"
20 #include "sharedsorest.h"
21 #include "sharedthetayc.h"
22 #include "sharedthetan.h"
23 #include "sharedkstest.h"
24 #include "whittaker.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
34
35
36 //**********************************************************************************************************************
37
38 SummarySharedCommand::SummarySharedCommand(string option)  {
39         try {
40                 globaldata = GlobalData::getInstance();
41                 abort = false;
42                 allLines = 1;
43                 labels.clear();
44                 Estimators.clear();
45                 
46                 //allow user to run help
47                 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
48                 
49                 else {
50                         //valid paramters for this command
51                         string Array[] =  {"label","calc","groups","all","outputdir","inputdir"};
52                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
53                         
54                         OptionParser parser(option);
55                         map<string, string> parameters = parser.getParameters();
56                         
57                         ValidParameters validParameter;
58                 
59                         //check to make sure all parameters are valid for command
60                         for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
61                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
62                         }
63                         
64                         //make sure the user has already run the read.otu command
65                         if (globaldata->getSharedFile() == "") {
66                                  m->mothurOut("You must read a list and a group, or a shared before you can use the summary.shared command."); m->mothurOutEndLine(); abort = true; 
67                         }
68                         
69                         //if the user changes the output directory command factory will send this info to us in the output parameter 
70                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
71                                 outputDir = ""; 
72                                 outputDir += m->hasPath(globaldata->getSharedFile()); //if user entered a file with a path then preserve it     
73                         }
74
75                         //check for optional parameter and set defaults
76                         // ...at some point should added some additional type checking...
77                         label = validParameter.validFile(parameters, "label", false);                   
78                         if (label == "not found") { label = ""; }
79                         else { 
80                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
81                                 else { allLines = 1;  }
82                         }
83                         
84                         //if the user has not specified any labels use the ones from read.otu
85                         if(label == "") {  
86                                 allLines = globaldata->allLines; 
87                                 labels = globaldata->labels; 
88                         }
89                                 
90                         calc = validParameter.validFile(parameters, "calc", false);                     
91                         if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
92                         else { 
93                                  if (calc == "default")  {  calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
94                         }
95                         m->splitAtDash(calc, Estimators);
96                         
97                         groups = validParameter.validFile(parameters, "groups", false);                 
98                         if (groups == "not found") { groups = ""; }
99                         else { 
100                                 m->splitAtDash(groups, Groups);
101                                 globaldata->Groups = Groups;
102                         }
103                         
104                         string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
105                         all = m->isTrue(temp);
106                         
107                         if (abort == false) {
108                         
109                                 validCalculator = new ValidCalculators();
110                                 int i;
111                                 
112                                 for (i=0; i<Estimators.size(); i++) {
113                                         if (validCalculator->isValidCalculator("sharedsummary", Estimators[i]) == true) { 
114                                                 if (Estimators[i] == "sharedsobs") { 
115                                                         sumCalculators.push_back(new SharedSobsCS());
116                                                 }else if (Estimators[i] == "sharedchao") { 
117                                                         sumCalculators.push_back(new SharedChao1());
118                                                 }else if (Estimators[i] == "sharedace") { 
119                                                         sumCalculators.push_back(new SharedAce());
120                                                 }else if (Estimators[i] == "jabund") {  
121                                                         sumCalculators.push_back(new JAbund());
122                                                 }else if (Estimators[i] == "sorabund") { 
123                                                         sumCalculators.push_back(new SorAbund());
124                                                 }else if (Estimators[i] == "jclass") { 
125                                                         sumCalculators.push_back(new Jclass());
126                                                 }else if (Estimators[i] == "sorclass") { 
127                                                         sumCalculators.push_back(new SorClass());
128                                                 }else if (Estimators[i] == "jest") { 
129                                                         sumCalculators.push_back(new Jest());
130                                                 }else if (Estimators[i] == "sorest") { 
131                                                         sumCalculators.push_back(new SorEst());
132                                                 }else if (Estimators[i] == "thetayc") { 
133                                                         sumCalculators.push_back(new ThetaYC());
134                                                 }else if (Estimators[i] == "thetan") { 
135                                                         sumCalculators.push_back(new ThetaN());
136                                                 }else if (Estimators[i] == "kstest") { 
137                                                         sumCalculators.push_back(new KSTest());
138                                                 }else if (Estimators[i] == "sharednseqs") { 
139                                                         sumCalculators.push_back(new SharedNSeqs());
140                                                 }else if (Estimators[i] == "ochiai") { 
141                                                         sumCalculators.push_back(new Ochiai());
142                                                 }else if (Estimators[i] == "anderberg") { 
143                                                         sumCalculators.push_back(new Anderberg());
144                                                 }else if (Estimators[i] == "kulczynski") { 
145                                                         sumCalculators.push_back(new Kulczynski());
146                                                 }else if (Estimators[i] == "kulczynskicody") { 
147                                                         sumCalculators.push_back(new KulczynskiCody());
148                                                 }else if (Estimators[i] == "lennon") { 
149                                                         sumCalculators.push_back(new Lennon());
150                                                 }else if (Estimators[i] == "morisitahorn") { 
151                                                         sumCalculators.push_back(new MorHorn());
152                                                 }else if (Estimators[i] == "braycurtis") { 
153                                                         sumCalculators.push_back(new BrayCurtis());
154                                                 }else if (Estimators[i] == "whittaker") { 
155                                                         sumCalculators.push_back(new Whittaker());
156                                                 }
157                                         }
158                                 }
159                                 
160                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "shared.summary";
161                                 m->openOutputFile(outputFileName, outputFileHandle);
162                                 outputNames.push_back(outputFileName);
163                                 
164                                 mult = false;
165                         }
166                 }
167         }
168         catch(exception& e) {
169                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
170                 exit(1);
171         }
172 }
173
174 //**********************************************************************************************************************
175
176 void SummarySharedCommand::help(){
177         try {
178                 m->mothurOut("The summary.shared command can only be executed after a successful read.otu command.\n");
179                 m->mothurOut("The summary.shared command parameters are label, calc and all.  No parameters are required.\n");
180                 m->mothurOut("The summary.shared command should be in the following format: \n");
181                 m->mothurOut("summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n");
182                 m->mothurOut("Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n");
183                 validCalculator->printCalc("sharedsummary", cout);
184                 m->mothurOut("The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n");
185                 m->mothurOut("The default value for groups is all the groups in your groupfile.\n");
186                 m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
187                 m->mothurOut("The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n");
188                 m->mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
189                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n");
190                 m->mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n");
191         }
192         catch(exception& e) {
193                 m->errorOut(e, "SummarySharedCommand", "help");
194                 exit(1);
195         }
196 }
197
198 //**********************************************************************************************************************
199
200 SummarySharedCommand::~SummarySharedCommand(){
201         if (abort == false) {
202                 delete read;
203                 delete validCalculator;
204         }
205 }
206
207 //**********************************************************************************************************************
208
209 int SummarySharedCommand::execute(){
210         try {
211         
212                 if (abort == true) { return 0; }
213                 
214                 //if the users entered no valid calculators don't execute command
215                 if (sumCalculators.size() == 0) { return 0; }
216                 //check if any calcs can do multiples
217                 else{
218                         if (all){ 
219                                 for (int i = 0; i < sumCalculators.size(); i++) {
220                                         if (sumCalculators[i]->getMultiple() == true) { mult = true; }
221                                 }
222                         }
223                 }
224                 
225                 //read first line
226                 read = new ReadOTUFile(globaldata->inputFileName);      
227                 read->read(&*globaldata); 
228                         
229                 input = globaldata->ginput;
230                 lookup = input->getSharedRAbundVectors();
231                 string lastLabel = lookup[0]->getLabel();
232                 
233                 //output estimator names as column headers
234                 outputFileHandle << "label" <<'\t' << "comparison" << '\t'; 
235                 for(int i=0;i<sumCalculators.size();i++){
236                         outputFileHandle << '\t' << sumCalculators[i]->getName();
237                         if (sumCalculators[i]->getCols() == 3) {   outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";  }
238                 }
239                 outputFileHandle << endl;
240                 
241                 //create file and put column headers for multiple groups file
242                 if (mult == true) {
243                         outAllFileName = ((m->getRootName(globaldata->inputFileName)) + "sharedmultiple.summary");
244                         m->openOutputFile(outAllFileName, outAll);
245                         outputNames.push_back(outAllFileName);
246                         
247                         outAll << "label" <<'\t' << "comparison" << '\t'; 
248                         for(int i=0;i<sumCalculators.size();i++){
249                                 if (sumCalculators[i]->getMultiple() == true) { 
250                                         outAll << '\t' << sumCalculators[i]->getName();
251                                 }
252                         }
253                         outAll << endl;
254                 }
255                 
256                 if (lookup.size() < 2) { 
257                         m->mothurOut("I cannot run the command without at least 2 valid groups."); 
258                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
259                         
260                         //close files and clean up
261                         outputFileHandle.close();  remove(outputFileName.c_str());
262                         if (mult == true) {  outAll.close();  remove(outAllFileName.c_str());  }
263                         return 0;
264                 //if you only have 2 groups you don't need a .sharedmultiple file
265                 }else if ((lookup.size() == 2) && (mult == true)) { 
266                         mult = false;
267                         outAll.close();  
268                         remove(outAllFileName.c_str());
269                         outputNames.pop_back();
270                 }
271                 
272                 if (m->control_pressed) {
273                         if (mult) { outAll.close();  remove(outAllFileName.c_str());  }
274                         outputFileHandle.close(); remove(outputFileName.c_str()); 
275                         delete input; globaldata->ginput = NULL;
276                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
277                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
278                         globaldata->Groups.clear(); 
279                         return 0;
280                 }
281                                                                 
282                                                                                                                 
283                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
284                 set<string> processedLabels;
285                 set<string> userLabels = labels;
286                         
287                 //as long as you are not at the end of the file or done wih the lines you want
288                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
289                         if (m->control_pressed) {
290                                 if (mult) { outAll.close();  remove(outAllFileName.c_str());  }
291                                 outputFileHandle.close(); remove(outputFileName.c_str()); 
292                                 delete input; globaldata->ginput = NULL;
293                                 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
294                                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
295                                 globaldata->Groups.clear(); 
296                                 return 0;
297                         }
298
299                 
300                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
301                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
302                                 process(lookup);
303                                 
304                                 processedLabels.insert(lookup[0]->getLabel());
305                                 userLabels.erase(lookup[0]->getLabel());
306                         }
307                         
308                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
309                                         string saveLabel = lookup[0]->getLabel();
310                                         
311                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
312                                         lookup = input->getSharedRAbundVectors(lastLabel);
313
314                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
315                                         process(lookup);
316                                         
317                                         processedLabels.insert(lookup[0]->getLabel());
318                                         userLabels.erase(lookup[0]->getLabel());
319                                         
320                                         //restore real lastlabel to save below
321                                         lookup[0]->setLabel(saveLabel);
322                         }
323                         
324                         lastLabel = lookup[0]->getLabel();                      
325                                 
326                         //get next line to process
327                         //prevent memory leak
328                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
329                         lookup = input->getSharedRAbundVectors();
330                 }
331                 
332                 if (m->control_pressed) {
333                         if (mult) { outAll.close();  remove(outAllFileName.c_str());  }
334                         outputFileHandle.close(); remove(outputFileName.c_str()); 
335                         delete input; globaldata->ginput = NULL;
336                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
337                         globaldata->Groups.clear(); 
338                         return 0;
339                 }
340
341                 //output error messages about any remaining user labels
342                 set<string>::iterator it;
343                 bool needToRun = false;
344                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
345                         m->mothurOut("Your file does not include the label " + *it); 
346                         if (processedLabels.count(lastLabel) != 1) {
347                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
348                                 needToRun = true;
349                         }else {
350                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
351                         }
352                 }
353                 
354                 //run last label if you need to
355                 if (needToRun == true)  {
356                                 for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
357                                 lookup = input->getSharedRAbundVectors(lastLabel);
358
359                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
360                                 process(lookup);
361                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
362                 }
363                 
364                                 
365                 //reset groups parameter
366                 globaldata->Groups.clear();  
367                 
368                 //close files
369                 outputFileHandle.close();
370                 if (mult == true) {  outAll.close();  }
371                 
372                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
373                 delete input;  globaldata->ginput = NULL;
374                 
375                 if (m->control_pressed) {
376                         remove(outAllFileName.c_str());  
377                         remove(outputFileName.c_str()); 
378                         return 0;
379                 }
380                 
381                 m->mothurOutEndLine();
382                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
383                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
384                 m->mothurOutEndLine();
385
386                 return 0;
387         }
388         catch(exception& e) {
389                 m->errorOut(e, "SummarySharedCommand", "execute");
390                 exit(1);
391         }
392 }
393
394 /***********************************************************/
395 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup) {
396         try {
397                                 //loop through calculators and add to file all for all calcs that can do mutiple groups
398                                 if (mult == true) {
399                                         //output label
400                                         outAll << thisLookup[0]->getLabel() << '\t';
401                                         
402                                         //output groups names
403                                         string outNames = "";
404                                         for (int j = 0; j < thisLookup.size(); j++) {
405                                                 outNames += thisLookup[j]->getGroup() +  "-";
406                                         }
407                                         outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
408                                         outAll << outNames << '\t';
409                                         
410                                         for(int i=0;i<sumCalculators.size();i++){
411                                                 if (sumCalculators[i]->getMultiple() == true) { 
412                                                         sumCalculators[i]->getValues(thisLookup);
413                                                         
414                                                         if (m->control_pressed) { return 1; }
415                                                         
416                                                         outAll << '\t';
417                                                         sumCalculators[i]->print(outAll);
418                                                 }
419                                         }
420                                         outAll << endl;
421                                 }
422         
423                                 int n = 1; 
424                                 vector<SharedRAbundVector*> subset;
425                                 for (int k = 0; k < (thisLookup.size() - 1); k++) { // pass cdd each set of groups to compare
426         
427                                         for (int l = n; l < thisLookup.size(); l++) {
428                                                 
429                                                 outputFileHandle << thisLookup[0]->getLabel() << '\t';
430                                                 
431                                                 subset.clear(); //clear out old pair of sharedrabunds
432                                                 //add new pair of sharedrabunds
433                                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
434                                                 
435                                                 //sort groups to be alphanumeric
436                                                 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
437                                                         outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
438                                                 }else{
439                                                         outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
440                                                 }
441                                                 
442                                                 for(int i=0;i<sumCalculators.size();i++) {
443
444                                                         sumCalculators[i]->getValues(subset); //saves the calculator outputs
445                                                         
446                                                         if (m->control_pressed) { return 1; }
447                                                         
448                                                         outputFileHandle << '\t';
449                                                         sumCalculators[i]->print(outputFileHandle);
450                                                 }
451                                                 outputFileHandle << endl;
452                                         }
453                                         n++;
454                                 }
455                         return 0;
456         }
457         catch(exception& e) {
458                 m->errorOut(e, "SummarySharedCommand", "process");
459                 exit(1);
460         }
461 }
462
463 /***********************************************************/