2 * collectsharedcommand.cpp
5 * Created by Sarah Westcott on 1/2/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "collectsharedcommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharedjabund.h"
15 #include "sharedsorabund.h"
16 #include "sharedjclass.h"
17 #include "sharedsorclass.h"
18 #include "sharedjest.h"
19 #include "sharedsorest.h"
20 #include "sharedthetayc.h"
21 #include "sharedthetan.h"
22 #include "sharedkstest.h"
23 #include "whittaker.h"
24 #include "sharednseqs.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
37 //**********************************************************************************************************************
39 CollectSharedCommand::CollectSharedCommand(string option) {
41 globaldata = GlobalData::getInstance();
48 //allow user to run help
49 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
52 //valid paramters for this command
53 string Array[] = {"freq","label","calc","groups","all","outputdir","inputdir"};
54 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
56 OptionParser parser(option);
57 map<string,string> parameters=parser.getParameters();
59 ValidParameters validParameter;
61 //check to make sure all parameters are valid for command
62 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
63 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
66 //if the user changes the output directory command factory will send this info to us in the output parameter
67 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
70 //make sure the user has already run the read.otu command
71 if (globaldata->getSharedFile() == "") {
72 if (globaldata->getListFile() == "") { m->mothurOut("You must read a list and a group, or a shared before you can use the collect.shared command."); m->mothurOutEndLine(); abort = true; }
73 else if (globaldata->getGroupFile() == "") { m->mothurOut("You must read a list and a group, or a shared before you can use the collect.shared command."); m->mothurOutEndLine(); abort = true; }
77 //check for optional parameter and set defaults
78 // ...at some point should added some additional type checking..
79 label = validParameter.validFile(parameters, "label", false);
80 if (label == "not found") { label = ""; }
82 if(label != "all") { splitAtDash(label, labels); allLines = 0; }
83 else { allLines = 1; }
86 //if the user has not specified any labels use the ones from read.otu
88 allLines = globaldata->allLines;
89 labels = globaldata->labels;
92 calc = validParameter.validFile(parameters, "calc", false);
93 if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
95 if (calc == "default") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; }
97 splitAtDash(calc, Estimators);
99 groups = validParameter.validFile(parameters, "groups", false);
100 if (groups == "not found") { groups = ""; }
102 splitAtDash(groups, Groups);
104 globaldata->Groups = Groups;
107 temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; }
110 temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; }
113 if (abort == false) {
115 if (outputDir == "") { outputDir += hasPath(globaldata->inputFileName); }
116 string fileNameRoot = outputDir + getRootName(getSimpleName(globaldata->inputFileName));
117 format = globaldata->getFormat();
120 validCalculator = new ValidCalculators();
121 util = new SharedUtil();
123 for (i=0; i<Estimators.size(); i++) {
124 if (validCalculator->isValidCalculator("shared", Estimators[i]) == true) {
125 if (Estimators[i] == "sharedchao") {
126 cDisplays.push_back(new CollectDisplay(new SharedChao1(), new SharedOneColumnFile(fileNameRoot+"shared.chao")));
127 outputNames.push_back(fileNameRoot+"shared.chao");
128 }else if (Estimators[i] == "sharedsobs") {
129 cDisplays.push_back(new CollectDisplay(new SharedSobsCS(), new SharedOneColumnFile(fileNameRoot+"shared.sobs")));
130 outputNames.push_back(fileNameRoot+"shared.sobs");
131 }else if (Estimators[i] == "sharedace") {
132 cDisplays.push_back(new CollectDisplay(new SharedAce(), new SharedOneColumnFile(fileNameRoot+"shared.ace")));
133 outputNames.push_back(fileNameRoot+"shared.ace");
134 }else if (Estimators[i] == "jabund") {
135 cDisplays.push_back(new CollectDisplay(new JAbund(), new SharedOneColumnFile(fileNameRoot+"jabund")));
136 outputNames.push_back(fileNameRoot+"jabund");
137 }else if (Estimators[i] == "sorabund") {
138 cDisplays.push_back(new CollectDisplay(new SorAbund(), new SharedOneColumnFile(fileNameRoot+"sorabund")));
139 outputNames.push_back(fileNameRoot+"sorabund");
140 }else if (Estimators[i] == "jclass") {
141 cDisplays.push_back(new CollectDisplay(new Jclass(), new SharedOneColumnFile(fileNameRoot+"jclass")));
142 outputNames.push_back(fileNameRoot+"jclass");
143 }else if (Estimators[i] == "sorclass") {
144 cDisplays.push_back(new CollectDisplay(new SorClass(), new SharedOneColumnFile(fileNameRoot+"sorclass")));
145 outputNames.push_back(fileNameRoot+"sorclass");
146 }else if (Estimators[i] == "jest") {
147 cDisplays.push_back(new CollectDisplay(new Jest(), new SharedOneColumnFile(fileNameRoot+"jest")));
148 outputNames.push_back(fileNameRoot+"jest");
149 }else if (Estimators[i] == "sorest") {
150 cDisplays.push_back(new CollectDisplay(new SorEst(), new SharedOneColumnFile(fileNameRoot+"sorest")));
151 outputNames.push_back(fileNameRoot+"sorest");
152 }else if (Estimators[i] == "thetayc") {
153 cDisplays.push_back(new CollectDisplay(new ThetaYC(), new SharedOneColumnFile(fileNameRoot+"thetayc")));
154 outputNames.push_back(fileNameRoot+"thetayc");
155 }else if (Estimators[i] == "thetan") {
156 cDisplays.push_back(new CollectDisplay(new ThetaN(), new SharedOneColumnFile(fileNameRoot+"thetan")));
157 outputNames.push_back(fileNameRoot+"thetan");
158 }else if (Estimators[i] == "kstest") {
159 cDisplays.push_back(new CollectDisplay(new KSTest(), new SharedOneColumnFile(fileNameRoot+"kstest")));
160 outputNames.push_back(fileNameRoot+"kstest");
161 }else if (Estimators[i] == "whittaker") {
162 cDisplays.push_back(new CollectDisplay(new Whittaker(), new SharedOneColumnFile(fileNameRoot+"whittaker")));
163 outputNames.push_back(fileNameRoot+"whittaker");
164 }else if (Estimators[i] == "sharednseqs") {
165 cDisplays.push_back(new CollectDisplay(new SharedNSeqs(), new SharedOneColumnFile(fileNameRoot+"shared.nseqs")));
166 outputNames.push_back(fileNameRoot+"shared.nseqs");
167 }else if (Estimators[i] == "ochiai") {
168 cDisplays.push_back(new CollectDisplay(new Ochiai(), new SharedOneColumnFile(fileNameRoot+"ochiai")));
169 outputNames.push_back(fileNameRoot+"ochiai");
170 }else if (Estimators[i] == "anderberg") {
171 cDisplays.push_back(new CollectDisplay(new Anderberg(), new SharedOneColumnFile(fileNameRoot+"anderberg")));
172 outputNames.push_back(fileNameRoot+"anderberg");
173 }else if (Estimators[i] == "skulczynski") {
174 cDisplays.push_back(new CollectDisplay(new Kulczynski(), new SharedOneColumnFile(fileNameRoot+"kulczynski")));
175 outputNames.push_back(fileNameRoot+"kulczynski");
176 }else if (Estimators[i] == "kulczynskicody") {
177 cDisplays.push_back(new CollectDisplay(new KulczynskiCody(), new SharedOneColumnFile(fileNameRoot+"kulczynskicody")));
178 outputNames.push_back(fileNameRoot+"kulczynskicody");
179 }else if (Estimators[i] == "lennon") {
180 cDisplays.push_back(new CollectDisplay(new Lennon(), new SharedOneColumnFile(fileNameRoot+"lennon")));
181 outputNames.push_back(fileNameRoot+"lennon");
182 }else if (Estimators[i] == "morisitahorn") {
183 cDisplays.push_back(new CollectDisplay(new MorHorn(), new SharedOneColumnFile(fileNameRoot+"morisitahorn")));
184 outputNames.push_back(fileNameRoot+"morisitahorn");
185 }else if (Estimators[i] == "braycurtis") {
186 cDisplays.push_back(new CollectDisplay(new BrayCurtis(), new SharedOneColumnFile(fileNameRoot+"braycurtis")));
187 outputNames.push_back(fileNameRoot+"braycurtis");
195 catch(exception& e) {
196 m->errorOut(e, "CollectSharedCommand", "CollectSharedCommand");
200 //**********************************************************************************************************************
202 void CollectSharedCommand::help(){
204 m->mothurOut("The collect.shared command can only be executed after a successful read.otu command.\n");
205 m->mothurOut("The collect.shared command parameters are label, freq, calc and groups. No parameters are required \n");
206 m->mothurOut("The collect.shared command should be in the following format: \n");
207 m->mothurOut("collect.shared(label=yourLabel, freq=yourFreq, calc=yourEstimators, groups=yourGroups).\n");
208 m->mothurOut("Example collect.shared(label=unique-.01-.03, freq=10, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n");
209 m->mothurOut("The default values for freq is 100 and calc are sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan.\n");
210 m->mothurOut("The default value for groups is all the groups in your groupfile.\n");
211 validCalculator->printCalc("shared", cout);
212 m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
213 m->mothurOut("The all parameter is used to specify if you want the estimate of all your groups together. This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n");
214 m->mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
215 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n");
216 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
219 catch(exception& e) {
220 m->errorOut(e, "CollectSharedCommand", "help");
225 //**********************************************************************************************************************
227 CollectSharedCommand::~CollectSharedCommand(){
228 if (abort == false) {
229 delete input; globaldata->ginput = NULL;
232 delete validCalculator;
233 globaldata->gorder = NULL;
237 //**********************************************************************************************************************
239 int CollectSharedCommand::execute(){
242 if (abort == true) { return 0; }
244 //if the users entered no valid calculators don't execute command
245 if (cDisplays.size() == 0) { return 0; }
246 for(int i=0;i<cDisplays.size();i++){ cDisplays[i]->setAll(all); }
248 read = new ReadOTUFile(globaldata->inputFileName);
249 read->read(&*globaldata);
251 input = globaldata->ginput;
252 order = input->getSharedOrderVector();
253 string lastLabel = order->getLabel();
255 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
256 set<string> processedLabels;
257 set<string> userLabels = labels;
260 util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "collect");
261 util->updateGroupIndex(globaldata->Groups, globaldata->gGroupmap->groupIndex);
263 while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
264 if (m->control_pressed) {
265 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
266 for(int i=0;i<cDisplays.size();i++){ delete cDisplays[i]; }
268 globaldata->Groups.clear();
272 if(allLines == 1 || labels.count(order->getLabel()) == 1){
274 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
275 //create collectors curve
276 cCurve = new Collect(order, cDisplays);
277 cCurve->getSharedCurve(freq);
280 processedLabels.insert(order->getLabel());
281 userLabels.erase(order->getLabel());
284 //you have a label the user want that is smaller than this label and the last label has not already been processed
285 if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
286 string saveLabel = order->getLabel();
289 order = input->getSharedOrderVector(lastLabel);
291 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
292 //create collectors curve
293 cCurve = new Collect(order, cDisplays);
294 cCurve->getSharedCurve(freq);
297 processedLabels.insert(order->getLabel());
298 userLabels.erase(order->getLabel());
300 //restore real lastlabel to save below
301 order->setLabel(saveLabel);
305 lastLabel = order->getLabel();
307 //get next line to process
309 order = input->getSharedOrderVector();
312 if (m->control_pressed) {
313 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
314 for(int i=0;i<cDisplays.size();i++){ delete cDisplays[i]; }
315 globaldata->Groups.clear();
319 //output error messages about any remaining user labels
320 set<string>::iterator it;
321 bool needToRun = false;
322 for (it = userLabels.begin(); it != userLabels.end(); it++) {
323 m->mothurOut("Your file does not include the label " + *it);
324 if (processedLabels.count(lastLabel) != 1) {
325 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
328 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
332 //run last label if you need to
333 if (needToRun == true) {
334 if (order != NULL) { delete order; }
335 order = input->getSharedOrderVector(lastLabel);
337 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
338 cCurve = new Collect(order, cDisplays);
339 cCurve->getSharedCurve(freq);
342 if (m->control_pressed) {
343 for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }
344 for(int i=0;i<cDisplays.size();i++){ delete cDisplays[i]; }
346 globaldata->Groups.clear();
353 for(int i=0;i<cDisplays.size();i++){ delete cDisplays[i]; }
355 //reset groups parameter
356 globaldata->Groups.clear();
358 m->mothurOutEndLine();
359 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
360 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
361 m->mothurOutEndLine();
366 catch(exception& e) {
367 m->errorOut(e, "CollectSharedCommand", "execute");
372 /***********************************************************/