]> git.donarmstrong.com Git - mothur.git/blob - summarysharedcommand.cpp
added set.dir command and modified commands to redirect input and output, removed...
[mothur.git] / summarysharedcommand.cpp
1 /*
2  *  summarysharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarysharedcommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharednseqs.h"
15 #include "sharedjabund.h"
16 #include "sharedsorabund.h"
17 #include "sharedjclass.h"
18 #include "sharedsorclass.h"
19 #include "sharedjest.h"
20 #include "sharedsorest.h"
21 #include "sharedthetayc.h"
22 #include "sharedthetan.h"
23 #include "sharedkstest.h"
24 #include "whittaker.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
34
35
36 //**********************************************************************************************************************
37
38 SummarySharedCommand::SummarySharedCommand(string option){
39         try {
40                 globaldata = GlobalData::getInstance();
41                 abort = false;
42                 allLines = 1;
43                 labels.clear();
44                 Estimators.clear();
45                 
46                 //allow user to run help
47                 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
48                 
49                 else {
50                         //valid paramters for this command
51                         string Array[] =  {"label","calc","groups","all","outputdir","inputdir"};
52                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
53                         
54                         OptionParser parser(option);
55                         map<string, string> parameters = parser.getParameters();
56                         
57                         ValidParameters validParameter;
58                 
59                         //check to make sure all parameters are valid for command
60                         for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
61                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
62                         }
63                         
64                         //make sure the user has already run the read.otu command
65                         if (globaldata->getSharedFile() == "") {
66                                  mothurOut("You must read a list and a group, or a shared before you can use the summary.shared command."); mothurOutEndLine(); abort = true; 
67                         }
68                         
69                         //if the user changes the output directory command factory will send this info to us in the output parameter 
70                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
71                                 outputDir = ""; 
72                                 outputDir += hasPath(globaldata->getSharedFile()); //if user entered a file with a path then preserve it        
73                         }
74
75                         //check for optional parameter and set defaults
76                         // ...at some point should added some additional type checking...
77                         label = validParameter.validFile(parameters, "label", false);                   
78                         if (label == "not found") { label = ""; }
79                         else { 
80                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
81                                 else { allLines = 1;  }
82                         }
83                         
84                         //if the user has not specified any labels use the ones from read.otu
85                         if(label == "") {  
86                                 allLines = globaldata->allLines; 
87                                 labels = globaldata->labels; 
88                         }
89                                 
90                         calc = validParameter.validFile(parameters, "calc", false);                     
91                         if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
92                         else { 
93                                  if (calc == "default")  {  calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
94                         }
95                         splitAtDash(calc, Estimators);
96                         
97                         groups = validParameter.validFile(parameters, "groups", false);                 
98                         if (groups == "not found") { groups = ""; }
99                         else { 
100                                 splitAtDash(groups, Groups);
101                                 globaldata->Groups = Groups;
102                         }
103                         
104                         string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
105                         all = isTrue(temp);
106                         
107                         if (abort == false) {
108                         
109                                 validCalculator = new ValidCalculators();
110                                 int i;
111                                 
112                                 for (i=0; i<Estimators.size(); i++) {
113                                         if (validCalculator->isValidCalculator("sharedsummary", Estimators[i]) == true) { 
114                                                 if (Estimators[i] == "sharedsobs") { 
115                                                         sumCalculators.push_back(new SharedSobsCS());
116                                                 }else if (Estimators[i] == "sharedchao") { 
117                                                         sumCalculators.push_back(new SharedChao1());
118                                                 }else if (Estimators[i] == "sharedace") { 
119                                                         sumCalculators.push_back(new SharedAce());
120                                                 }else if (Estimators[i] == "jabund") {  
121                                                         sumCalculators.push_back(new JAbund());
122                                                 }else if (Estimators[i] == "sorabund") { 
123                                                         sumCalculators.push_back(new SorAbund());
124                                                 }else if (Estimators[i] == "jclass") { 
125                                                         sumCalculators.push_back(new Jclass());
126                                                 }else if (Estimators[i] == "sorclass") { 
127                                                         sumCalculators.push_back(new SorClass());
128                                                 }else if (Estimators[i] == "jest") { 
129                                                         sumCalculators.push_back(new Jest());
130                                                 }else if (Estimators[i] == "sorest") { 
131                                                         sumCalculators.push_back(new SorEst());
132                                                 }else if (Estimators[i] == "thetayc") { 
133                                                         sumCalculators.push_back(new ThetaYC());
134                                                 }else if (Estimators[i] == "thetan") { 
135                                                         sumCalculators.push_back(new ThetaN());
136                                                 }else if (Estimators[i] == "kstest") { 
137                                                         sumCalculators.push_back(new KSTest());
138                                                 }else if (Estimators[i] == "sharednseqs") { 
139                                                         sumCalculators.push_back(new SharedNSeqs());
140                                                 }else if (Estimators[i] == "ochiai") { 
141                                                         sumCalculators.push_back(new Ochiai());
142                                                 }else if (Estimators[i] == "anderberg") { 
143                                                         sumCalculators.push_back(new Anderberg());
144                                                 }else if (Estimators[i] == "kulczynski") { 
145                                                         sumCalculators.push_back(new Kulczynski());
146                                                 }else if (Estimators[i] == "kulczynskicody") { 
147                                                         sumCalculators.push_back(new KulczynskiCody());
148                                                 }else if (Estimators[i] == "lennon") { 
149                                                         sumCalculators.push_back(new Lennon());
150                                                 }else if (Estimators[i] == "morisitahorn") { 
151                                                         sumCalculators.push_back(new MorHorn());
152                                                 }else if (Estimators[i] == "braycurtis") { 
153                                                         sumCalculators.push_back(new BrayCurtis());
154                                                 }else if (Estimators[i] == "whittaker") { 
155                                                         sumCalculators.push_back(new Whittaker());
156                                                 }
157                                         }
158                                 }
159                                 
160                                 outputFileName = outputDir + getRootName(getSimpleName(globaldata->inputFileName)) + "shared.summary";
161                                 openOutputFile(outputFileName, outputFileHandle);
162                                 mult = false;
163                         }
164                 }
165         }
166         catch(exception& e) {
167                 errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
168                 exit(1);
169         }
170 }
171
172 //**********************************************************************************************************************
173
174 void SummarySharedCommand::help(){
175         try {
176                 mothurOut("The summary.shared command can only be executed after a successful read.otu command.\n");
177                 mothurOut("The summary.shared command parameters are label, calc and all.  No parameters are required.\n");
178                 mothurOut("The summary.shared command should be in the following format: \n");
179                 mothurOut("summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n");
180                 mothurOut("Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n");
181                 validCalculator->printCalc("sharedsummary", cout);
182                 mothurOut("The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n");
183                 mothurOut("The default value for groups is all the groups in your groupfile.\n");
184                 mothurOut("The label parameter is used to analyze specific labels in your input.\n");
185                 mothurOut("The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n");
186                 mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
187                 mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n");
188                 mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n");
189         }
190         catch(exception& e) {
191                 errorOut(e, "SummarySharedCommand", "help");
192                 exit(1);
193         }
194 }
195
196 //**********************************************************************************************************************
197
198 SummarySharedCommand::~SummarySharedCommand(){
199         if (abort == false) {
200                 delete read;
201                 delete validCalculator;
202         }
203 }
204
205 //**********************************************************************************************************************
206
207 int SummarySharedCommand::execute(){
208         try {
209         
210                 if (abort == true) { return 0; }
211         
212                 //if the users entered no valid calculators don't execute command
213                 if (sumCalculators.size() == 0) { return 0; }
214                 //check if any calcs can do multiples
215                 else{
216                         if (all){ 
217                                 for (int i = 0; i < sumCalculators.size(); i++) {
218                                         if (sumCalculators[i]->getMultiple() == true) { mult = true; }
219                                 }
220                         }
221                 }
222                 
223                 //read first line
224                 read = new ReadOTUFile(globaldata->inputFileName);      
225                 read->read(&*globaldata); 
226                         
227                 input = globaldata->ginput;
228                 lookup = input->getSharedRAbundVectors();
229                 string lastLabel = lookup[0]->getLabel();
230                 
231                 //output estimator names as column headers
232                 outputFileHandle << "label" <<'\t' << "comparison" << '\t'; 
233                 for(int i=0;i<sumCalculators.size();i++){
234                         outputFileHandle << '\t' << sumCalculators[i]->getName();
235                 }
236                 outputFileHandle << endl;
237                 
238                 //create file and put column headers for multiple groups file
239                 if (mult == true) {
240                         outAllFileName = ((getRootName(globaldata->inputFileName)) + "sharedmultiple.summary");
241                         openOutputFile(outAllFileName, outAll);
242                         
243                         outAll << "label" <<'\t' << "comparison" << '\t'; 
244                         for(int i=0;i<sumCalculators.size();i++){
245                                 if (sumCalculators[i]->getMultiple() == true) { 
246                                         outAll << '\t' << sumCalculators[i]->getName();
247                                 }
248                         }
249                         outAll << endl;
250                 }
251                 
252                 if (lookup.size() < 2) { 
253                         mothurOut("I cannot run the command without at least 2 valid groups."); 
254                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
255                         
256                         //close files and clean up
257                         outputFileHandle.close();  remove(outputFileName.c_str());
258                         if (mult == true) {  outAll.close();  remove(outAllFileName.c_str());  }
259                         return 0;
260                 //if you only have 2 groups you don't need a .sharedmultiple file
261                 }else if ((lookup.size() == 2) && (mult == true)) { 
262                         mult = false;
263                         outAll.close();  
264                         remove(outAllFileName.c_str());
265                 }
266                                         
267                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
268                 set<string> processedLabels;
269                 set<string> userLabels = labels;
270                         
271                 //as long as you are not at the end of the file or done wih the lines you want
272                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
273                 
274                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
275                                 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
276                                 process(lookup);
277                                 
278                                 processedLabels.insert(lookup[0]->getLabel());
279                                 userLabels.erase(lookup[0]->getLabel());
280                         }
281                         
282                         if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
283                                         string saveLabel = lookup[0]->getLabel();
284                                         
285                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
286                                         lookup = input->getSharedRAbundVectors(lastLabel);
287
288                                         mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
289                                         process(lookup);
290                                         
291                                         processedLabels.insert(lookup[0]->getLabel());
292                                         userLabels.erase(lookup[0]->getLabel());
293                                         
294                                         //restore real lastlabel to save below
295                                         lookup[0]->setLabel(saveLabel);
296                         }
297                         
298                         lastLabel = lookup[0]->getLabel();                      
299                                 
300                         //get next line to process
301                         //prevent memory leak
302                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
303                         lookup = input->getSharedRAbundVectors();
304                 }
305                 
306                 //output error messages about any remaining user labels
307                 set<string>::iterator it;
308                 bool needToRun = false;
309                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
310                         mothurOut("Your file does not include the label " + *it); 
311                         if (processedLabels.count(lastLabel) != 1) {
312                                 mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
313                                 needToRun = true;
314                         }else {
315                                 mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
316                         }
317                 }
318                 
319                 //run last label if you need to
320                 if (needToRun == true)  {
321                                 for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
322                                 lookup = input->getSharedRAbundVectors(lastLabel);
323
324                                 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
325                                 process(lookup);
326                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
327                 }
328                 
329
330                 //reset groups parameter
331                 globaldata->Groups.clear();  
332                 
333                 //close files
334                 outputFileHandle.close();
335                 if (mult == true) {  outAll.close();  }
336                 
337                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
338                 
339                 delete input;  globaldata->ginput = NULL;
340
341                 return 0;
342         }
343         catch(exception& e) {
344                 errorOut(e, "SummarySharedCommand", "execute");
345                 exit(1);
346         }
347 }
348
349 /***********************************************************/
350 void SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup) {
351         try {
352                                 //loop through calculators and add to file all for all calcs that can do mutiple groups
353                                 if (mult == true) {
354                                         //output label
355                                         outAll << thisLookup[0]->getLabel() << '\t';
356                                         
357                                         //output groups names
358                                         string outNames = "";
359                                         for (int j = 0; j < thisLookup.size(); j++) {
360                                                 outNames += thisLookup[j]->getGroup() +  "-";
361                                         }
362                                         outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
363                                         outAll << outNames << '\t';
364                                         
365                                         for(int i=0;i<sumCalculators.size();i++){
366                                                 if (sumCalculators[i]->getMultiple() == true) { 
367                                                         sumCalculators[i]->getValues(thisLookup);
368                                                         outAll << '\t';
369                                                         sumCalculators[i]->print(outAll);
370                                                 }
371                                         }
372                                         outAll << endl;
373                                 }
374         
375                                 int n = 1; 
376                                 vector<SharedRAbundVector*> subset;
377                                 for (int k = 0; k < (thisLookup.size() - 1); k++) { // pass cdd each set of groups to commpare
378                                         for (int l = n; l < thisLookup.size(); l++) {
379                                                 
380                                                 outputFileHandle << thisLookup[0]->getLabel() << '\t';
381                                                 
382                                                 subset.clear(); //clear out old pair of sharedrabunds
383                                                 //add new pair of sharedrabunds
384                                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
385                                                 
386                                                 //sort groups to be alphanumeric
387                                                 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
388                                                         outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
389                                                 }else{
390                                                         outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
391                                                 }
392                                                 
393                                                 for(int i=0;i<sumCalculators.size();i++) {
394
395                                                         sumCalculators[i]->getValues(subset); //saves the calculator outputs
396                                                         outputFileHandle << '\t';
397                                                         sumCalculators[i]->print(outputFileHandle);
398                                                 }
399                                                 outputFileHandle << endl;
400                                         }
401                                         n++;
402                                 }
403
404         }
405         catch(exception& e) {
406                 errorOut(e, "SummarySharedCommand", "process");
407                 exit(1);
408         }
409 }
410
411 /***********************************************************/