2 * summarysharedcommand.cpp
5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "summarysharedcommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharednseqs.h"
15 #include "sharedjabund.h"
16 #include "sharedsorabund.h"
17 #include "sharedjclass.h"
18 #include "sharedsorclass.h"
19 #include "sharedjest.h"
20 #include "sharedsorest.h"
21 #include "sharedthetayc.h"
22 #include "sharedthetan.h"
23 #include "sharedkstest.h"
24 #include "whittaker.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
36 //**********************************************************************************************************************
38 SummarySharedCommand::SummarySharedCommand(string option) {
40 globaldata = GlobalData::getInstance();
46 //allow user to run help
47 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
50 //valid paramters for this command
51 string Array[] = {"label","calc","groups","all","outputdir","inputdir"};
52 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
54 OptionParser parser(option);
55 map<string, string> parameters = parser.getParameters();
57 ValidParameters validParameter;
59 //check to make sure all parameters are valid for command
60 for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
61 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
64 //make sure the user has already run the read.otu command
65 if (globaldata->getSharedFile() == "") {
66 m->mothurOut("You must read a list and a group, or a shared before you can use the summary.shared command."); m->mothurOutEndLine(); abort = true;
69 //if the user changes the output directory command factory will send this info to us in the output parameter
70 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
72 outputDir += hasPath(globaldata->getSharedFile()); //if user entered a file with a path then preserve it
75 //check for optional parameter and set defaults
76 // ...at some point should added some additional type checking...
77 label = validParameter.validFile(parameters, "label", false);
78 if (label == "not found") { label = ""; }
80 if(label != "all") { splitAtDash(label, labels); allLines = 0; }
81 else { allLines = 1; }
84 //if the user has not specified any labels use the ones from read.otu
86 allLines = globaldata->allLines;
87 labels = globaldata->labels;
90 calc = validParameter.validFile(parameters, "calc", false);
91 if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
93 if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
95 splitAtDash(calc, Estimators);
97 groups = validParameter.validFile(parameters, "groups", false);
98 if (groups == "not found") { groups = ""; }
100 splitAtDash(groups, Groups);
101 globaldata->Groups = Groups;
104 string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
107 if (abort == false) {
109 validCalculator = new ValidCalculators();
112 for (i=0; i<Estimators.size(); i++) {
113 if (validCalculator->isValidCalculator("sharedsummary", Estimators[i]) == true) {
114 if (Estimators[i] == "sharedsobs") {
115 sumCalculators.push_back(new SharedSobsCS());
116 }else if (Estimators[i] == "sharedchao") {
117 sumCalculators.push_back(new SharedChao1());
118 }else if (Estimators[i] == "sharedace") {
119 sumCalculators.push_back(new SharedAce());
120 }else if (Estimators[i] == "jabund") {
121 sumCalculators.push_back(new JAbund());
122 }else if (Estimators[i] == "sorabund") {
123 sumCalculators.push_back(new SorAbund());
124 }else if (Estimators[i] == "jclass") {
125 sumCalculators.push_back(new Jclass());
126 }else if (Estimators[i] == "sorclass") {
127 sumCalculators.push_back(new SorClass());
128 }else if (Estimators[i] == "jest") {
129 sumCalculators.push_back(new Jest());
130 }else if (Estimators[i] == "sorest") {
131 sumCalculators.push_back(new SorEst());
132 }else if (Estimators[i] == "thetayc") {
133 sumCalculators.push_back(new ThetaYC());
134 }else if (Estimators[i] == "thetan") {
135 sumCalculators.push_back(new ThetaN());
136 }else if (Estimators[i] == "kstest") {
137 sumCalculators.push_back(new KSTest());
138 }else if (Estimators[i] == "sharednseqs") {
139 sumCalculators.push_back(new SharedNSeqs());
140 }else if (Estimators[i] == "ochiai") {
141 sumCalculators.push_back(new Ochiai());
142 }else if (Estimators[i] == "anderberg") {
143 sumCalculators.push_back(new Anderberg());
144 }else if (Estimators[i] == "kulczynski") {
145 sumCalculators.push_back(new Kulczynski());
146 }else if (Estimators[i] == "kulczynskicody") {
147 sumCalculators.push_back(new KulczynskiCody());
148 }else if (Estimators[i] == "lennon") {
149 sumCalculators.push_back(new Lennon());
150 }else if (Estimators[i] == "morisitahorn") {
151 sumCalculators.push_back(new MorHorn());
152 }else if (Estimators[i] == "braycurtis") {
153 sumCalculators.push_back(new BrayCurtis());
154 }else if (Estimators[i] == "whittaker") {
155 sumCalculators.push_back(new Whittaker());
160 outputFileName = outputDir + getRootName(getSimpleName(globaldata->inputFileName)) + "shared.summary";
161 openOutputFile(outputFileName, outputFileHandle);
162 outputNames.push_back(outputFileName);
168 catch(exception& e) {
169 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
174 //**********************************************************************************************************************
176 void SummarySharedCommand::help(){
178 m->mothurOut("The summary.shared command can only be executed after a successful read.otu command.\n");
179 m->mothurOut("The summary.shared command parameters are label, calc and all. No parameters are required.\n");
180 m->mothurOut("The summary.shared command should be in the following format: \n");
181 m->mothurOut("summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n");
182 m->mothurOut("Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n");
183 validCalculator->printCalc("sharedsummary", cout);
184 m->mothurOut("The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n");
185 m->mothurOut("The default value for groups is all the groups in your groupfile.\n");
186 m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
187 m->mothurOut("The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n");
188 m->mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
189 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n");
190 m->mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n");
192 catch(exception& e) {
193 m->errorOut(e, "SummarySharedCommand", "help");
198 //**********************************************************************************************************************
200 SummarySharedCommand::~SummarySharedCommand(){
201 if (abort == false) {
203 delete validCalculator;
207 //**********************************************************************************************************************
209 int SummarySharedCommand::execute(){
212 if (abort == true) { return 0; }
214 //if the users entered no valid calculators don't execute command
215 if (sumCalculators.size() == 0) { return 0; }
216 //check if any calcs can do multiples
219 for (int i = 0; i < sumCalculators.size(); i++) {
220 if (sumCalculators[i]->getMultiple() == true) { mult = true; }
226 read = new ReadOTUFile(globaldata->inputFileName);
227 read->read(&*globaldata);
229 input = globaldata->ginput;
230 lookup = input->getSharedRAbundVectors();
231 string lastLabel = lookup[0]->getLabel();
233 //output estimator names as column headers
234 outputFileHandle << "label" <<'\t' << "comparison" << '\t';
235 for(int i=0;i<sumCalculators.size();i++){
236 outputFileHandle << '\t' << sumCalculators[i]->getName();
238 outputFileHandle << endl;
240 //create file and put column headers for multiple groups file
242 outAllFileName = ((getRootName(globaldata->inputFileName)) + "sharedmultiple.summary");
243 openOutputFile(outAllFileName, outAll);
244 outputNames.push_back(outAllFileName);
246 outAll << "label" <<'\t' << "comparison" << '\t';
247 for(int i=0;i<sumCalculators.size();i++){
248 if (sumCalculators[i]->getMultiple() == true) {
249 outAll << '\t' << sumCalculators[i]->getName();
255 if (lookup.size() < 2) {
256 m->mothurOut("I cannot run the command without at least 2 valid groups.");
257 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
259 //close files and clean up
260 outputFileHandle.close(); remove(outputFileName.c_str());
261 if (mult == true) { outAll.close(); remove(outAllFileName.c_str()); }
263 //if you only have 2 groups you don't need a .sharedmultiple file
264 }else if ((lookup.size() == 2) && (mult == true)) {
267 remove(outAllFileName.c_str());
268 outputNames.pop_back();
271 if (m->control_pressed) {
272 if (mult) { outAll.close(); remove(outAllFileName.c_str()); }
273 outputFileHandle.close(); remove(outputFileName.c_str());
274 delete input; globaldata->ginput = NULL;
275 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
276 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
277 globaldata->Groups.clear();
282 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
283 set<string> processedLabels;
284 set<string> userLabels = labels;
286 //as long as you are not at the end of the file or done wih the lines you want
287 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
288 if (m->control_pressed) {
289 if (mult) { outAll.close(); remove(outAllFileName.c_str()); }
290 outputFileHandle.close(); remove(outputFileName.c_str());
291 delete input; globaldata->ginput = NULL;
292 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
293 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
294 globaldata->Groups.clear();
299 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
300 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
303 processedLabels.insert(lookup[0]->getLabel());
304 userLabels.erase(lookup[0]->getLabel());
307 if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
308 string saveLabel = lookup[0]->getLabel();
310 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
311 lookup = input->getSharedRAbundVectors(lastLabel);
313 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
316 processedLabels.insert(lookup[0]->getLabel());
317 userLabels.erase(lookup[0]->getLabel());
319 //restore real lastlabel to save below
320 lookup[0]->setLabel(saveLabel);
323 lastLabel = lookup[0]->getLabel();
325 //get next line to process
326 //prevent memory leak
327 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
328 lookup = input->getSharedRAbundVectors();
331 if (m->control_pressed) {
332 if (mult) { outAll.close(); remove(outAllFileName.c_str()); }
333 outputFileHandle.close(); remove(outputFileName.c_str());
334 delete input; globaldata->ginput = NULL;
335 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
336 globaldata->Groups.clear();
340 //output error messages about any remaining user labels
341 set<string>::iterator it;
342 bool needToRun = false;
343 for (it = userLabels.begin(); it != userLabels.end(); it++) {
344 m->mothurOut("Your file does not include the label " + *it);
345 if (processedLabels.count(lastLabel) != 1) {
346 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
349 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
353 //run last label if you need to
354 if (needToRun == true) {
355 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
356 lookup = input->getSharedRAbundVectors(lastLabel);
358 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
360 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
364 //reset groups parameter
365 globaldata->Groups.clear();
368 outputFileHandle.close();
369 if (mult == true) { outAll.close(); }
371 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
372 delete input; globaldata->ginput = NULL;
374 if (m->control_pressed) {
375 remove(outAllFileName.c_str());
376 remove(outputFileName.c_str());
380 m->mothurOutEndLine();
381 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
382 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
383 m->mothurOutEndLine();
387 catch(exception& e) {
388 m->errorOut(e, "SummarySharedCommand", "execute");
393 /***********************************************************/
394 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup) {
396 //loop through calculators and add to file all for all calcs that can do mutiple groups
399 outAll << thisLookup[0]->getLabel() << '\t';
401 //output groups names
402 string outNames = "";
403 for (int j = 0; j < thisLookup.size(); j++) {
404 outNames += thisLookup[j]->getGroup() + "-";
406 outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
407 outAll << outNames << '\t';
409 for(int i=0;i<sumCalculators.size();i++){
410 if (sumCalculators[i]->getMultiple() == true) {
411 sumCalculators[i]->getValues(thisLookup);
413 if (m->control_pressed) { return 1; }
416 sumCalculators[i]->print(outAll);
423 vector<SharedRAbundVector*> subset;
424 for (int k = 0; k < (thisLookup.size() - 1); k++) { // pass cdd each set of groups to commpare
425 for (int l = n; l < thisLookup.size(); l++) {
427 outputFileHandle << thisLookup[0]->getLabel() << '\t';
429 subset.clear(); //clear out old pair of sharedrabunds
430 //add new pair of sharedrabunds
431 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
433 //sort groups to be alphanumeric
434 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
435 outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
437 outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
440 for(int i=0;i<sumCalculators.size();i++) {
442 sumCalculators[i]->getValues(subset); //saves the calculator outputs
444 if (m->control_pressed) { return 1; }
446 outputFileHandle << '\t';
447 sumCalculators[i]->print(outputFileHandle);
449 outputFileHandle << endl;
455 catch(exception& e) {
456 m->errorOut(e, "SummarySharedCommand", "process");
461 /***********************************************************/