2 * summarysharedcommand.cpp
5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "summarysharedcommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharednseqs.h"
15 #include "sharedjabund.h"
16 #include "sharedsorabund.h"
17 #include "sharedjclass.h"
18 #include "sharedsorclass.h"
19 #include "sharedjest.h"
20 #include "sharedsorest.h"
21 #include "sharedthetayc.h"
22 #include "sharedthetan.h"
23 #include "sharedkstest.h"
24 #include "whittaker.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
36 //**********************************************************************************************************************
38 SummarySharedCommand::SummarySharedCommand(string option){
40 globaldata = GlobalData::getInstance();
46 //allow user to run help
47 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
50 //valid paramters for this command
51 string Array[] = {"label","calc","groups","all"};
52 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
54 OptionParser parser(option);
55 map<string, string> parameters = parser.getParameters();
57 ValidParameters validParameter;
59 //check to make sure all parameters are valid for command
60 for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
61 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
64 //make sure the user has already run the read.otu command
65 if (globaldata->getSharedFile() == "") {
66 mothurOut("You must read a list and a group, or a shared before you can use the summary.shared command."); mothurOutEndLine(); abort = true;
69 //check for optional parameter and set defaults
70 // ...at some point should added some additional type checking...
71 label = validParameter.validFile(parameters, "label", false);
72 if (label == "not found") { label = ""; }
74 if(label != "all") { splitAtDash(label, labels); allLines = 0; }
75 else { allLines = 1; }
78 //if the user has not specified any labels use the ones from read.otu
80 allLines = globaldata->allLines;
81 labels = globaldata->labels;
84 calc = validParameter.validFile(parameters, "calc", false);
85 if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
87 if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
89 splitAtDash(calc, Estimators);
91 groups = validParameter.validFile(parameters, "groups", false);
92 if (groups == "not found") { groups = ""; }
94 splitAtDash(groups, Groups);
95 globaldata->Groups = Groups;
98 string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "true"; }
101 if (abort == false) {
103 validCalculator = new ValidCalculators();
106 for (i=0; i<Estimators.size(); i++) {
107 if (validCalculator->isValidCalculator("sharedsummary", Estimators[i]) == true) {
108 if (Estimators[i] == "sharedsobs") {
109 sumCalculators.push_back(new SharedSobsCS());
110 }else if (Estimators[i] == "sharedchao") {
111 sumCalculators.push_back(new SharedChao1());
112 }else if (Estimators[i] == "sharedace") {
113 sumCalculators.push_back(new SharedAce());
114 }else if (Estimators[i] == "jabund") {
115 sumCalculators.push_back(new JAbund());
116 }else if (Estimators[i] == "sorabund") {
117 sumCalculators.push_back(new SorAbund());
118 }else if (Estimators[i] == "jclass") {
119 sumCalculators.push_back(new Jclass());
120 }else if (Estimators[i] == "sorclass") {
121 sumCalculators.push_back(new SorClass());
122 }else if (Estimators[i] == "jest") {
123 sumCalculators.push_back(new Jest());
124 }else if (Estimators[i] == "sorest") {
125 sumCalculators.push_back(new SorEst());
126 }else if (Estimators[i] == "thetayc") {
127 sumCalculators.push_back(new ThetaYC());
128 }else if (Estimators[i] == "thetan") {
129 sumCalculators.push_back(new ThetaN());
130 }else if (Estimators[i] == "kstest") {
131 sumCalculators.push_back(new KSTest());
132 }else if (Estimators[i] == "sharednseqs") {
133 sumCalculators.push_back(new SharedNSeqs());
134 }else if (Estimators[i] == "ochiai") {
135 sumCalculators.push_back(new Ochiai());
136 }else if (Estimators[i] == "anderberg") {
137 sumCalculators.push_back(new Anderberg());
138 }else if (Estimators[i] == "kulczynski") {
139 sumCalculators.push_back(new Kulczynski());
140 }else if (Estimators[i] == "kulczynskicody") {
141 sumCalculators.push_back(new KulczynskiCody());
142 }else if (Estimators[i] == "lennon") {
143 sumCalculators.push_back(new Lennon());
144 }else if (Estimators[i] == "morisitahorn") {
145 sumCalculators.push_back(new MorHorn());
146 }else if (Estimators[i] == "braycurtis") {
147 sumCalculators.push_back(new BrayCurtis());
148 }else if (Estimators[i] == "whittaker") {
149 sumCalculators.push_back(new Whittaker());
154 outputFileName = ((getRootName(globaldata->inputFileName)) + "shared.summary");
155 openOutputFile(outputFileName, outputFileHandle);
160 catch(exception& e) {
161 errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
166 //**********************************************************************************************************************
168 void SummarySharedCommand::help(){
170 mothurOut("The summary.shared command can only be executed after a successful read.otu command.\n");
171 mothurOut("The summary.shared command parameters are label, calc and all. No parameters are required.\n");
172 mothurOut("The summary.shared command should be in the following format: \n");
173 mothurOut("summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n");
174 mothurOut("Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n");
175 validCalculator->printCalc("sharedsummary", cout);
176 mothurOut("The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n");
177 mothurOut("The default value for groups is all the groups in your groupfile.\n");
178 mothurOut("The label parameter is used to analyze specific labels in your input.\n");
179 mothurOut("The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is true.\n");
180 mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
181 mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n");
182 mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n");
184 catch(exception& e) {
185 errorOut(e, "SummarySharedCommand", "help");
190 //**********************************************************************************************************************
192 SummarySharedCommand::~SummarySharedCommand(){
193 if (abort == false) {
195 delete validCalculator;
199 //**********************************************************************************************************************
201 int SummarySharedCommand::execute(){
204 if (abort == true) { return 0; }
206 //if the users entered no valid calculators don't execute command
207 if (sumCalculators.size() == 0) { return 0; }
208 //check if any calcs can do multiples
211 for (int i = 0; i < sumCalculators.size(); i++) {
212 if (sumCalculators[i]->getMultiple() == true) { mult = true; }
218 read = new ReadOTUFile(globaldata->inputFileName);
219 read->read(&*globaldata);
221 input = globaldata->ginput;
222 lookup = input->getSharedRAbundVectors();
223 string lastLabel = lookup[0]->getLabel();
225 //output estimator names as column headers
226 outputFileHandle << "label" <<'\t' << "comparison" << '\t';
227 for(int i=0;i<sumCalculators.size();i++){
228 outputFileHandle << '\t' << sumCalculators[i]->getName();
230 outputFileHandle << endl;
232 //create file and put column headers for multiple groups file
234 outAllFileName = ((getRootName(globaldata->inputFileName)) + "sharedmultiple.summary");
235 openOutputFile(outAllFileName, outAll);
237 outAll << "label" <<'\t' << "comparison" << '\t';
238 for(int i=0;i<sumCalculators.size();i++){
239 if (sumCalculators[i]->getMultiple() == true) {
240 outAll << '\t' << sumCalculators[i]->getName();
246 if (lookup.size() < 2) {
247 mothurOut("I cannot run the command without at least 2 valid groups.");
248 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
250 //close files and clean up
251 outputFileHandle.close(); remove(outputFileName.c_str());
252 if (mult == true) { outAll.close(); remove(outAllFileName.c_str()); }
254 //if you only have 2 groups you don't need a .sharedmultiple file
255 }else if ((lookup.size() == 2) && (mult == true)) {
258 remove(outAllFileName.c_str());
261 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
262 set<string> processedLabels;
263 set<string> userLabels = labels;
265 //as long as you are not at the end of the file or done wih the lines you want
266 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
268 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
269 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
272 processedLabels.insert(lookup[0]->getLabel());
273 userLabels.erase(lookup[0]->getLabel());
276 if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
277 string saveLabel = lookup[0]->getLabel();
279 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
280 lookup = input->getSharedRAbundVectors(lastLabel);
282 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
285 processedLabels.insert(lookup[0]->getLabel());
286 userLabels.erase(lookup[0]->getLabel());
288 //restore real lastlabel to save below
289 lookup[0]->setLabel(saveLabel);
292 lastLabel = lookup[0]->getLabel();
294 //get next line to process
295 //prevent memory leak
296 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
297 lookup = input->getSharedRAbundVectors();
300 //output error messages about any remaining user labels
301 set<string>::iterator it;
302 bool needToRun = false;
303 for (it = userLabels.begin(); it != userLabels.end(); it++) {
304 mothurOut("Your file does not include the label " + *it);
305 if (processedLabels.count(lastLabel) != 1) {
306 mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
309 mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
313 //run last label if you need to
314 if (needToRun == true) {
315 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
316 lookup = input->getSharedRAbundVectors(lastLabel);
318 mothurOut(lookup[0]->getLabel()); mothurOutEndLine();
320 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
324 //reset groups parameter
325 globaldata->Groups.clear();
328 outputFileHandle.close();
329 if (mult == true) { outAll.close(); }
331 for(int i=0;i<sumCalculators.size();i++){ delete sumCalculators[i]; }
333 delete input; globaldata->ginput = NULL;
337 catch(exception& e) {
338 errorOut(e, "SummarySharedCommand", "execute");
343 /***********************************************************/
344 void SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup) {
346 //loop through calculators and add to file all for all calcs that can do mutiple groups
349 outAll << thisLookup[0]->getLabel() << '\t';
351 //output groups names
352 string outNames = "";
353 for (int j = 0; j < thisLookup.size(); j++) {
354 outNames += thisLookup[j]->getGroup() + "-";
356 outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
357 outAll << outNames << '\t';
359 for(int i=0;i<sumCalculators.size();i++){
360 if (sumCalculators[i]->getMultiple() == true) {
361 sumCalculators[i]->getValues(thisLookup);
363 sumCalculators[i]->print(outAll);
370 vector<SharedRAbundVector*> subset;
371 for (int k = 0; k < (thisLookup.size() - 1); k++) { // pass cdd each set of groups to commpare
372 for (int l = n; l < thisLookup.size(); l++) {
374 outputFileHandle << thisLookup[0]->getLabel() << '\t';
376 subset.clear(); //clear out old pair of sharedrabunds
377 //add new pair of sharedrabunds
378 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
380 //sort groups to be alphanumeric
381 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
382 outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
384 outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
387 for(int i=0;i<sumCalculators.size();i++) {
389 sumCalculators[i]->getValues(subset); //saves the calculator outputs
390 outputFileHandle << '\t';
391 sumCalculators[i]->print(outputFileHandle);
393 outputFileHandle << endl;
399 catch(exception& e) {
400 errorOut(e, "SummarySharedCommand", "process");
405 /***********************************************************/