2 * collectsharedcommand.cpp
5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "collectsharedcommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharedjabund.h"
15 #include "sharedsorabund.h"
16 #include "sharedjclass.h"
17 #include "sharedsorclass.h"
18 #include "sharedjest.h"
19 #include "sharedsorest.h"
20 #include "sharedthetayc.h"
21 #include "sharedthetan.h"
22 #include "sharedkstest.h"
23 #include "whittaker.h"
24 #include "sharednseqs.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
36 //**********************************************************************************************************************
37 vector<string> CollectSharedCommand::getValidParameters(){
39 string AlignArray[] = {"freq","label","calc","groups","all","outputdir","inputdir"};
40 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
44 m->errorOut(e, "CollectSharedCommand", "getValidParameters");
48 //**********************************************************************************************************************
49 vector<string> CollectSharedCommand::getRequiredParameters(){
51 vector<string> myArray;
55 m->errorOut(e, "CollectSharedCommand", "getRequiredParameters");
59 //**********************************************************************************************************************
60 vector<string> CollectSharedCommand::getRequiredFiles(){
62 string AlignArray[] = {"shared"};
63 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
67 m->errorOut(e, "CollectSharedCommand", "getRequiredFiles");
71 //**********************************************************************************************************************
72 CollectSharedCommand::CollectSharedCommand(){
74 //initialize outputTypes
75 vector<string> tempOutNames;
76 outputTypes["sharedchao"] = tempOutNames;
77 outputTypes["sharedsobs"] = tempOutNames;
78 outputTypes["sharedace"] = tempOutNames;
79 outputTypes["jabund"] = tempOutNames;
80 outputTypes["sorabund"] = tempOutNames;
81 outputTypes["jclass"] = tempOutNames;
82 outputTypes["sorclass"] = tempOutNames;
83 outputTypes["jest"] = tempOutNames;
84 outputTypes["sorest"] = tempOutNames;
85 outputTypes["thetayc"] = tempOutNames;
86 outputTypes["thetan"] = tempOutNames;
87 outputTypes["kstest"] = tempOutNames;
88 outputTypes["whittaker"] = tempOutNames;
89 outputTypes["sharednseqs"] = tempOutNames;
90 outputTypes["ochiai"] = tempOutNames;
91 outputTypes["anderberg"] = tempOutNames;
92 outputTypes["skulczynski"] = tempOutNames;
93 outputTypes["kulczynskicody"] = tempOutNames;
94 outputTypes["lennon"] = tempOutNames;
95 outputTypes["morisitahorn"] = tempOutNames;
96 outputTypes["braycurtis"] = tempOutNames;
100 m->errorOut(e, "CollectSharedCommand", "CollectSharedCommand");
104 //**********************************************************************************************************************
105 CollectSharedCommand::CollectSharedCommand(string option) {
107 globaldata = GlobalData::getInstance();
114 //allow user to run help
115 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
118 //valid paramters for this command
119 string Array[] = {"freq","label","calc","groups","all","outputdir","inputdir"};
120 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
122 OptionParser parser(option);
123 map<string,string> parameters=parser.getParameters();
125 ValidParameters validParameter;
127 //check to make sure all parameters are valid for command
128 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
129 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
132 //initialize outputTypes
133 vector<string> tempOutNames;
134 outputTypes["sharedchao"] = tempOutNames;
135 outputTypes["sharedsobs"] = tempOutNames;
136 outputTypes["sharedace"] = tempOutNames;
137 outputTypes["jabund"] = tempOutNames;
138 outputTypes["sorabund"] = tempOutNames;
139 outputTypes["jclass"] = tempOutNames;
140 outputTypes["sorclass"] = tempOutNames;
141 outputTypes["jest"] = tempOutNames;
142 outputTypes["sorest"] = tempOutNames;
143 outputTypes["thetayc"] = tempOutNames;
144 outputTypes["thetan"] = tempOutNames;
145 outputTypes["kstest"] = tempOutNames;
146 outputTypes["whittaker"] = tempOutNames;
147 outputTypes["sharednseqs"] = tempOutNames;
148 outputTypes["ochiai"] = tempOutNames;
149 outputTypes["anderberg"] = tempOutNames;
150 outputTypes["skulczynski"] = tempOutNames;
151 outputTypes["kulczynskicody"] = tempOutNames;
152 outputTypes["lennon"] = tempOutNames;
153 outputTypes["morisitahorn"] = tempOutNames;
154 outputTypes["braycurtis"] = tempOutNames;
157 //if the user changes the output directory command factory will send this info to us in the output parameter
158 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
161 //make sure the user has already run the read.otu command
162 if (globaldata->getSharedFile() == "") {
163 if (globaldata->getListFile() == "") { m->mothurOut("You must read a list and a group, or a shared before you can use the collect.shared command."); m->mothurOutEndLine(); abort = true; }
164 else if (globaldata->getGroupFile() == "") { m->mothurOut("You must read a list and a group, or a shared before you can use the collect.shared command."); m->mothurOutEndLine(); abort = true; }
168 //check for optional parameter and set defaults
169 // ...at some point should added some additional type checking..
170 label = validParameter.validFile(parameters, "label", false);
171 if (label == "not found") { label = ""; }
173 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
174 else { allLines = 1; }
177 //if the user has not specified any labels use the ones from read.otu
179 allLines = globaldata->allLines;
180 labels = globaldata->labels;
183 calc = validParameter.validFile(parameters, "calc", false);
184 if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
186 if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
188 m->splitAtDash(calc, Estimators);
190 groups = validParameter.validFile(parameters, "groups", false);
191 if (groups == "not found") { groups = ""; }
193 m->splitAtDash(groups, Groups);
195 globaldata->Groups = Groups;
198 temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; }
201 temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
202 all = m->isTrue(temp);
204 if (abort == false) {
206 if (outputDir == "") { outputDir += m->hasPath(globaldata->inputFileName); }
207 string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName));
208 format = globaldata->getFormat();
211 validCalculator = new ValidCalculators();
212 util = new SharedUtil();
214 for (i=0; i<Estimators.size(); i++) {
215 if (validCalculator->isValidCalculator("shared", Estimators[i]) == true) {
216 if (Estimators[i] == "sharedchao") {
217 cDisplays.push_back(new CollectDisplay(new SharedChao1(), new SharedOneColumnFile(fileNameRoot+"shared.chao")));
218 outputNames.push_back(fileNameRoot+"shared.chao"); outputTypes["sharedchao"].push_back(fileNameRoot+"shared.chao");
219 }else if (Estimators[i] == "sharedsobs") {
220 cDisplays.push_back(new CollectDisplay(new SharedSobsCS(), new SharedOneColumnFile(fileNameRoot+"shared.sobs")));
221 outputNames.push_back(fileNameRoot+"shared.sobs"); outputTypes["sharedsobs"].push_back(fileNameRoot+"shared.sobs");
222 }else if (Estimators[i] == "sharedace") {
223 cDisplays.push_back(new CollectDisplay(new SharedAce(), new SharedOneColumnFile(fileNameRoot+"shared.ace")));
224 outputNames.push_back(fileNameRoot+"shared.ace"); outputTypes["sharedace"].push_back(fileNameRoot+"shared.ace");
225 }else if (Estimators[i] == "jabund") {
226 cDisplays.push_back(new CollectDisplay(new JAbund(), new SharedOneColumnFile(fileNameRoot+"jabund")));
227 outputNames.push_back(fileNameRoot+"jabund"); outputTypes["jabund"].push_back(fileNameRoot+"jabund");
228 }else if (Estimators[i] == "sorabund") {
229 cDisplays.push_back(new CollectDisplay(new SorAbund(), new SharedOneColumnFile(fileNameRoot+"sorabund")));
230 outputNames.push_back(fileNameRoot+"sorabund"); outputTypes["sorabund"].push_back(fileNameRoot+"sorabund");
231 }else if (Estimators[i] == "jclass") {
232 cDisplays.push_back(new CollectDisplay(new Jclass(), new SharedOneColumnFile(fileNameRoot+"jclass")));
233 outputNames.push_back(fileNameRoot+"jclass"); outputTypes["jclass"].push_back(fileNameRoot+"jclass");
234 }else if (Estimators[i] == "sorclass") {
235 cDisplays.push_back(new CollectDisplay(new SorClass(), new SharedOneColumnFile(fileNameRoot+"sorclass")));
236 outputNames.push_back(fileNameRoot+"sorclass"); outputTypes["sorclass"].push_back(fileNameRoot+"sorclass");
237 }else if (Estimators[i] == "jest") {
238 cDisplays.push_back(new CollectDisplay(new Jest(), new SharedOneColumnFile(fileNameRoot+"jest")));
239 outputNames.push_back(fileNameRoot+"jest"); outputTypes["jest"].push_back(fileNameRoot+"jest");
240 }else if (Estimators[i] == "sorest") {
241 cDisplays.push_back(new CollectDisplay(new SorEst(), new SharedOneColumnFile(fileNameRoot+"sorest")));
242 outputNames.push_back(fileNameRoot+"sorest"); outputTypes["sorest"].push_back(fileNameRoot+"sorest");
243 }else if (Estimators[i] == "thetayc") {
244 cDisplays.push_back(new CollectDisplay(new ThetaYC(), new SharedOneColumnFile(fileNameRoot+"thetayc")));
245 outputNames.push_back(fileNameRoot+"thetayc"); outputTypes["thetayc"].push_back(fileNameRoot+"thetayc");
246 }else if (Estimators[i] == "thetan") {
247 cDisplays.push_back(new CollectDisplay(new ThetaN(), new SharedOneColumnFile(fileNameRoot+"thetan")));
248 outputNames.push_back(fileNameRoot+"thetan"); outputTypes["thetan"].push_back(fileNameRoot+"thetan");
249 }else if (Estimators[i] == "kstest") {
250 cDisplays.push_back(new CollectDisplay(new KSTest(), new SharedOneColumnFile(fileNameRoot+"kstest")));
251 outputNames.push_back(fileNameRoot+"kstest"); outputTypes["kstest"].push_back(fileNameRoot+"kstest");
252 }else if (Estimators[i] == "whittaker") {
253 cDisplays.push_back(new CollectDisplay(new Whittaker(), new SharedOneColumnFile(fileNameRoot+"whittaker")));
254 outputNames.push_back(fileNameRoot+"whittaker"); outputTypes["whittaker"].push_back(fileNameRoot+"whittaker");
255 }else if (Estimators[i] == "sharednseqs") {
256 cDisplays.push_back(new CollectDisplay(new SharedNSeqs(), new SharedOneColumnFile(fileNameRoot+"shared.nseqs")));
257 outputNames.push_back(fileNameRoot+"shared.nseqs"); outputTypes["shared.nseqs"].push_back(fileNameRoot+"shared.nseqs");
258 }else if (Estimators[i] == "ochiai") {
259 cDisplays.push_back(new CollectDisplay(new Ochiai(), new SharedOneColumnFile(fileNameRoot+"ochiai")));
260 outputNames.push_back(fileNameRoot+"ochiai"); outputTypes["ochiai"].push_back(fileNameRoot+"ochiai");
261 }else if (Estimators[i] == "anderberg") {
262 cDisplays.push_back(new CollectDisplay(new Anderberg(), new SharedOneColumnFile(fileNameRoot+"anderberg")));
263 outputNames.push_back(fileNameRoot+"anderberg"); outputTypes["anderberg"].push_back(fileNameRoot+"anderberg");
264 }else if (Estimators[i] == "skulczynski") {
265 cDisplays.push_back(new CollectDisplay(new Kulczynski(), new SharedOneColumnFile(fileNameRoot+"kulczynski")));
266 outputNames.push_back(fileNameRoot+"kulczynski"); outputTypes["kulczynski"].push_back(fileNameRoot+"kulczynski");
267 }else if (Estimators[i] == "kulczynskicody") {
268 cDisplays.push_back(new CollectDisplay(new KulczynskiCody(), new SharedOneColumnFile(fileNameRoot+"kulczynskicody")));
269 outputNames.push_back(fileNameRoot+"kulczynskicody"); outputTypes["kulczynskicody"].push_back(fileNameRoot+"kulczynskicody");
270 }else if (Estimators[i] == "lennon") {
271 cDisplays.push_back(new CollectDisplay(new Lennon(), new SharedOneColumnFile(fileNameRoot+"lennon")));
272 outputNames.push_back(fileNameRoot+"lennon"); outputTypes["lennon"].push_back(fileNameRoot+"lennon");
273 }else if (Estimators[i] == "morisitahorn") {
274 cDisplays.push_back(new CollectDisplay(new MorHorn(), new SharedOneColumnFile(fileNameRoot+"morisitahorn")));
275 outputNames.push_back(fileNameRoot+"morisitahorn"); outputTypes["morisitahorn"].push_back(fileNameRoot+"morisitahorn");
276 }else if (Estimators[i] == "braycurtis") {
277 cDisplays.push_back(new CollectDisplay(new BrayCurtis(), new SharedOneColumnFile(fileNameRoot+"braycurtis")));
278 outputNames.push_back(fileNameRoot+"braycurtis"); outputTypes["braycurtis"].push_back(fileNameRoot+"braycurtis");
286 catch(exception& e) {
287 m->errorOut(e, "CollectSharedCommand", "CollectSharedCommand");
291 //**********************************************************************************************************************
293 void CollectSharedCommand::help(){
295 m->mothurOut("The collect.shared command can only be executed after a successful read.otu command.\n");
296 m->mothurOut("The collect.shared command parameters are label, freq, calc and groups. No parameters are required \n");
297 m->mothurOut("The collect.shared command should be in the following format: \n");
298 m->mothurOut("collect.shared(label=yourLabel, freq=yourFreq, calc=yourEstimators, groups=yourGroups).\n");
299 m->mothurOut("Example collect.shared(label=unique-.01-.03, freq=10, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n");
300 m->mothurOut("The default values for freq is 100 and calc are sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan.\n");
301 m->mothurOut("The default value for groups is all the groups in your groupfile.\n");
302 m->mothurOut("The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n");
303 validCalculator->printCalc("shared", cout);
304 m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
305 m->mothurOut("The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n");
306 m->mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
307 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n");
308 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
311 catch(exception& e) {
312 m->errorOut(e, "CollectSharedCommand", "help");
317 //**********************************************************************************************************************
319 CollectSharedCommand::~CollectSharedCommand(){
320 if (abort == false) {
321 delete input; globaldata->ginput = NULL;
324 delete validCalculator;
325 globaldata->gorder = NULL;
329 //**********************************************************************************************************************
331 int CollectSharedCommand::execute(){
334 if (abort == true) { return 0; }
336 //if the users entered no valid calculators don't execute command
337 if (cDisplays.size() == 0) { return 0; }
338 for(int i=0;i<cDisplays.size();i++){ cDisplays[i]->setAll(all); }
340 read = new ReadOTUFile(globaldata->inputFileName);
341 read->read(&*globaldata);
343 input = globaldata->ginput;
344 order = input->getSharedOrderVector();
345 string lastLabel = order->getLabel();
347 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
348 set<string> processedLabels;
349 set<string> userLabels = labels;
352 util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "collect");
353 util->updateGroupIndex(globaldata->Groups, globaldata->gGroupmap->groupIndex);
355 while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
356 if (m->control_pressed) {
357 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear();
358 for(int i=0;i<cDisplays.size();i++){ delete cDisplays[i]; }
360 globaldata->Groups.clear();
364 if(allLines == 1 || labels.count(order->getLabel()) == 1){
366 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
367 //create collectors curve
368 cCurve = new Collect(order, cDisplays);
369 cCurve->getSharedCurve(freq);
372 processedLabels.insert(order->getLabel());
373 userLabels.erase(order->getLabel());
376 //you have a label the user want that is smaller than this label and the last label has not already been processed
377 if ((m->anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
378 string saveLabel = order->getLabel();
381 order = input->getSharedOrderVector(lastLabel);
383 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
384 //create collectors curve
385 cCurve = new Collect(order, cDisplays);
386 cCurve->getSharedCurve(freq);
389 processedLabels.insert(order->getLabel());
390 userLabels.erase(order->getLabel());
392 //restore real lastlabel to save below
393 order->setLabel(saveLabel);
397 lastLabel = order->getLabel();
399 //get next line to process
401 order = input->getSharedOrderVector();
404 if (m->control_pressed) {
405 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear();
406 for(int i=0;i<cDisplays.size();i++){ delete cDisplays[i]; }
407 globaldata->Groups.clear();
411 //output error messages about any remaining user labels
412 set<string>::iterator it;
413 bool needToRun = false;
414 for (it = userLabels.begin(); it != userLabels.end(); it++) {
415 m->mothurOut("Your file does not include the label " + *it);
416 if (processedLabels.count(lastLabel) != 1) {
417 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
420 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
424 //run last label if you need to
425 if (needToRun == true) {
426 if (order != NULL) { delete order; }
427 order = input->getSharedOrderVector(lastLabel);
429 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
430 cCurve = new Collect(order, cDisplays);
431 cCurve->getSharedCurve(freq);
434 if (m->control_pressed) {
435 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear();
436 for(int i=0;i<cDisplays.size();i++){ delete cDisplays[i]; }
438 globaldata->Groups.clear();
445 for(int i=0;i<cDisplays.size();i++){ delete cDisplays[i]; }
447 //reset groups parameter
448 globaldata->Groups.clear();
450 m->mothurOutEndLine();
451 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
452 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
453 m->mothurOutEndLine();
458 catch(exception& e) {
459 m->errorOut(e, "CollectSharedCommand", "execute");
464 /***********************************************************/