2 * summarysharedcommand.cpp
5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "summarysharedcommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharednseqs.h"
15 #include "sharedjabund.h"
16 #include "sharedsorabund.h"
17 #include "sharedjclass.h"
18 #include "sharedsorclass.h"
19 #include "sharedjest.h"
20 #include "sharedsorest.h"
21 #include "sharedthetayc.h"
22 #include "sharedthetan.h"
23 #include "sharedkstest.h"
24 #include "whittaker.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
36 //**********************************************************************************************************************
38 SummarySharedCommand::SummarySharedCommand(string option) {
40 globaldata = GlobalData::getInstance();
46 //allow user to run help
47 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
50 //valid paramters for this command
51 string Array[] = {"label","calc","groups","all","outputdir","inputdir"};
52 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
54 OptionParser parser(option);
55 map<string, string> parameters = parser.getParameters();
57 ValidParameters validParameter;
59 //check to make sure all parameters are valid for command
60 for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
61 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
64 //make sure the user has already run the read.otu command
65 if (globaldata->getSharedFile() == "") {
66 m->mothurOut("You must read a list and a group, or a shared before you can use the summary.shared command."); m->mothurOutEndLine(); abort = true;
69 //if the user changes the output directory command factory will send this info to us in the output parameter
70 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
72 outputDir += m->hasPath(globaldata->getSharedFile()); //if user entered a file with a path then preserve it
75 //check for optional parameter and set defaults
76 // ...at some point should added some additional type checking...
77 label = validParameter.validFile(parameters, "label", false);
78 if (label == "not found") { label = ""; }
80 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
81 else { allLines = 1; }
84 //if the user has not specified any labels use the ones from read.otu
86 allLines = globaldata->allLines;
87 labels = globaldata->labels;
90 calc = validParameter.validFile(parameters, "calc", false);
91 if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
93 if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
95 m->splitAtDash(calc, Estimators);
97 groups = validParameter.validFile(parameters, "groups", false);
98 if (groups == "not found") { groups = ""; }
100 m->splitAtDash(groups, Groups);
101 globaldata->Groups = Groups;
104 string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
105 all = m->isTrue(temp);
107 if (abort == false) {
109 validCalculator = new ValidCalculators();
112 for (i=0; i<Estimators.size(); i++) {
113 if (validCalculator->isValidCalculator("sharedsummary", Estimators[i]) == true) {
114 if (Estimators[i] == "sharedsobs") {
115 sumCalculators.push_back(new SharedSobsCS());
116 }else if (Estimators[i] == "sharedchao") {
117 sumCalculators.push_back(new SharedChao1());
118 }else if (Estimators[i] == "sharedace") {
119 sumCalculators.push_back(new SharedAce());
120 }else if (Estimators[i] == "jabund") {
121 sumCalculators.push_back(new JAbund());
122 }else if (Estimators[i] == "sorabund") {
123 sumCalculators.push_back(new SorAbund());
124 }else if (Estimators[i] == "jclass") {
125 sumCalculators.push_back(new Jclass());
126 }else if (Estimators[i] == "sorclass") {
127 sumCalculators.push_back(new SorClass());
128 }else if (Estimators[i] == "jest") {
129 sumCalculators.push_back(new Jest());
130 }else if (Estimators[i] == "sorest") {
131 sumCalculators.push_back(new SorEst());
132 }else if (Estimators[i] == "thetayc") {
133 sumCalculators.push_back(new ThetaYC());
134 }else if (Estimators[i] == "thetan") {
135 sumCalculators.push_back(new ThetaN());
136 }else if (Estimators[i] == "kstest") {
137 sumCalculators.push_back(new KSTest());
138 }else if (Estimators[i] == "sharednseqs") {
139 sumCalculators.push_back(new SharedNSeqs());
140 }else if (Estimators[i] == "ochiai") {
141 sumCalculators.push_back(new Ochiai());
142 }else if (Estimators[i] == "anderberg") {
143 sumCalculators.push_back(new Anderberg());
144 }else if (Estimators[i] == "kulczynski") {
145 sumCalculators.push_back(new Kulczynski());
146 }else if (Estimators[i] == "kulczynskicody") {
147 sumCalculators.push_back(new KulczynskiCody());
148 }else if (Estimators[i] == "lennon") {
149 sumCalculators.push_back(new Lennon());
150 }else if (Estimators[i] == "morisitahorn") {
151 sumCalculators.push_back(new MorHorn());
152 }else if (Estimators[i] == "braycurtis") {
153 sumCalculators.push_back(new BrayCurtis());
154 }else if (Estimators[i] == "whittaker") {
155 sumCalculators.push_back(new Whittaker());
160 outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "shared.summary";
161 m->openOutputFile(outputFileName, outputFileHandle);
162 outputNames.push_back(outputFileName);
168 catch(exception& e) {
169 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
174 //**********************************************************************************************************************
176 void SummarySharedCommand::help(){
178 m->mothurOut("The summary.shared command can only be executed after a successful read.otu command.\n");
179 m->mothurOut("The summary.shared command parameters are label, calc and all. No parameters are required.\n");
180 m->mothurOut("The summary.shared command should be in the following format: \n");
181 m->mothurOut("summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n");
182 m->mothurOut("Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n");
183 validCalculator->printCalc("sharedsummary", cout);
184 m->mothurOut("The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n");
185 m->mothurOut("The default value for groups is all the groups in your groupfile.\n");
186 m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
187 m->mothurOut("The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n");
188 m->mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
189 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n");
190 m->mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n");
192 catch(exception& e) {
193 m->errorOut(e, "SummarySharedCommand", "help");
198 //**********************************************************************************************************************
200 SummarySharedCommand::~SummarySharedCommand(){
201 if (abort == false) {
203 delete validCalculator;
207 //**********************************************************************************************************************
209 int SummarySharedCommand::execute(){
212 if (abort == true) { return 0; }
214 //if the users entered no valid calculators don't execute command
215 if (sumCalculators.size() == 0) { return 0; }
216 //check if any calcs can do multiples
219 for (int i = 0; i < sumCalculators.size(); i++) {
220 if (sumCalculators[i]->getMultiple() == true) { mult = true; }
226 read = new ReadOTUFile(globaldata->inputFileName);
227 read->read(&*globaldata);
229 input = globaldata->ginput;
230 lookup = input->getSharedRAbundVectors();
231 string lastLabel = lookup[0]->getLabel();
233 //output estimator names as column headers
234 outputFileHandle << "label" <<'\t' << "comparison" << '\t';
235 for(int i=0;i<sumCalculators.size();i++){
236 outputFileHandle << '\t' << sumCalculators[i]->getName();
237 if (sumCalculators[i]->getCols() == 3) { outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; }
239 outputFileHandle << endl;
241 //create file and put column headers for multiple groups file
243 outAllFileName = ((m->getRootName(globaldata->inputFileName)) + "sharedmultiple.summary");
244 m->openOutputFile(outAllFileName, outAll);
245 outputNames.push_back(outAllFileName);
247 outAll << "label" <<'\t' << "comparison" << '\t';
248 for(int i=0;i<sumCalculators.size();i++){
249 if (sumCalculators[i]->getMultiple() == true) {
250 outAll << '\t' << sumCalculators[i]->getName();
256 if (lookup.size() < 2) {
257 m->mothurOut("I cannot run the command without at least 2 valid groups.");
258 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
260 //close files and clean up
261 outputFileHandle.close(); remove(outputFileName.c_str());
262 if (mult == true) { outAll.close(); remove(outAllFileName.c_str()); }
264 //if you only have 2 groups you don't need a .sharedmultiple file
265 }else if ((lookup.size() == 2) && (mult == true)) {
268 remove(outAllFileName.c_str());
269 outputNames.pop_back();
272 if (m->control_pressed) {
273 if (mult) { outAll.close(); remove(outAllFileName.c_str()); }
274 outputFileHandle.close(); remove(outputFileName.c_str());
275 delete input; globaldata->ginput = NULL;
276 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
277 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
278 globaldata->Groups.clear();
283 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
284 set<string> processedLabels;
285 set<string> userLabels = labels;
287 //as long as you are not at the end of the file or done wih the lines you want
288 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
289 if (m->control_pressed) {
290 if (mult) { outAll.close(); remove(outAllFileName.c_str()); }
291 outputFileHandle.close(); remove(outputFileName.c_str());
292 delete input; globaldata->ginput = NULL;
293 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
294 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
295 globaldata->Groups.clear();
300 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
301 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
304 processedLabels.insert(lookup[0]->getLabel());
305 userLabels.erase(lookup[0]->getLabel());
308 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
309 string saveLabel = lookup[0]->getLabel();
311 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
312 lookup = input->getSharedRAbundVectors(lastLabel);
314 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
317 processedLabels.insert(lookup[0]->getLabel());
318 userLabels.erase(lookup[0]->getLabel());
320 //restore real lastlabel to save below
321 lookup[0]->setLabel(saveLabel);
324 lastLabel = lookup[0]->getLabel();
326 //get next line to process
327 //prevent memory leak
328 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
329 lookup = input->getSharedRAbundVectors();
332 if (m->control_pressed) {
333 if (mult) { outAll.close(); remove(outAllFileName.c_str()); }
334 outputFileHandle.close(); remove(outputFileName.c_str());
335 delete input; globaldata->ginput = NULL;
336 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
337 globaldata->Groups.clear();
341 //output error messages about any remaining user labels
342 set<string>::iterator it;
343 bool needToRun = false;
344 for (it = userLabels.begin(); it != userLabels.end(); it++) {
345 m->mothurOut("Your file does not include the label " + *it);
346 if (processedLabels.count(lastLabel) != 1) {
347 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
350 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
354 //run last label if you need to
355 if (needToRun == true) {
356 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
357 lookup = input->getSharedRAbundVectors(lastLabel);
359 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
361 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
365 //reset groups parameter
366 globaldata->Groups.clear();
369 outputFileHandle.close();
370 if (mult == true) { outAll.close(); }
372 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
373 delete input; globaldata->ginput = NULL;
375 if (m->control_pressed) {
376 remove(outAllFileName.c_str());
377 remove(outputFileName.c_str());
381 m->mothurOutEndLine();
382 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
383 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
384 m->mothurOutEndLine();
388 catch(exception& e) {
389 m->errorOut(e, "SummarySharedCommand", "execute");
394 /***********************************************************/
395 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup) {
397 //loop through calculators and add to file all for all calcs that can do mutiple groups
400 outAll << thisLookup[0]->getLabel() << '\t';
402 //output groups names
403 string outNames = "";
404 for (int j = 0; j < thisLookup.size(); j++) {
405 outNames += thisLookup[j]->getGroup() + "-";
407 outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
408 outAll << outNames << '\t';
410 for(int i=0;i<sumCalculators.size();i++){
411 if (sumCalculators[i]->getMultiple() == true) {
412 sumCalculators[i]->getValues(thisLookup);
414 if (m->control_pressed) { return 1; }
417 sumCalculators[i]->print(outAll);
424 vector<SharedRAbundVector*> subset;
425 for (int k = 0; k < (thisLookup.size() - 1); k++) { // pass cdd each set of groups to compare
427 for (int l = n; l < thisLookup.size(); l++) {
429 outputFileHandle << thisLookup[0]->getLabel() << '\t';
431 subset.clear(); //clear out old pair of sharedrabunds
432 //add new pair of sharedrabunds
433 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
435 //sort groups to be alphanumeric
436 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
437 outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
439 outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
442 for(int i=0;i<sumCalculators.size();i++) {
444 sumCalculators[i]->getValues(subset); //saves the calculator outputs
446 if (m->control_pressed) { return 1; }
448 outputFileHandle << '\t';
449 sumCalculators[i]->print(outputFileHandle);
451 outputFileHandle << endl;
457 catch(exception& e) {
458 m->errorOut(e, "SummarySharedCommand", "process");
463 /***********************************************************/