]> git.donarmstrong.com Git - mothur.git/blob - collectsharedcommand.cpp
1.10.0
[mothur.git] / collectsharedcommand.cpp
1 /*
2  *  collectsharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "collectsharedcommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharedjabund.h"
15 #include "sharedsorabund.h"
16 #include "sharedjclass.h"
17 #include "sharedsorclass.h"
18 #include "sharedjest.h"
19 #include "sharedsorest.h"
20 #include "sharedthetayc.h"
21 #include "sharedthetan.h"
22 #include "sharedkstest.h"
23 #include "whittaker.h"
24 #include "sharednseqs.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
34
35
36
37 //**********************************************************************************************************************
38
39 CollectSharedCommand::CollectSharedCommand(string option)  {
40         try {
41                 globaldata = GlobalData::getInstance();
42                 abort = false;
43                 allLines = 1;
44                 labels.clear();
45                 Estimators.clear();
46                 Groups.clear();
47                 
48                 //allow user to run help
49                 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
50                 
51                 else {
52                         //valid paramters for this command
53                         string Array[] =  {"freq","label","calc","groups","all","outputdir","inputdir"};
54                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
55                         
56                         OptionParser parser(option);
57                         map<string,string> parameters=parser.getParameters();
58                         
59                         ValidParameters validParameter;
60                 
61                         //check to make sure all parameters are valid for command
62                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
63                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
64                         }
65                         
66                         //if the user changes the output directory command factory will send this info to us in the output parameter 
67                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
68                         
69                                                 
70                         //make sure the user has already run the read.otu command
71                         if (globaldata->getSharedFile() == "") {
72                                 if (globaldata->getListFile() == "") { m->mothurOut("You must read a list and a group, or a shared before you can use the collect.shared command."); m->mothurOutEndLine(); abort = true; }
73                                 else if (globaldata->getGroupFile() == "") { m->mothurOut("You must read a list and a group, or a shared before you can use the collect.shared command."); m->mothurOutEndLine(); abort = true; }
74                         }
75
76                         
77                         //check for optional parameter and set defaults
78                         // ...at some point should added some additional type checking..
79                         label = validParameter.validFile(parameters, "label", false);                   
80                         if (label == "not found") { label = ""; }
81                         else { 
82                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
83                                 else { allLines = 1;  }
84                         }
85                         
86                         //if the user has not specified any labels use the ones from read.otu
87                         if(label == "") {  
88                                 allLines = globaldata->allLines; 
89                                 labels = globaldata->labels; 
90                         }
91                                 
92                         calc = validParameter.validFile(parameters, "calc", false);                     
93                         if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
94                         else { 
95                                  if (calc == "default")  {  calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
96                         }
97                         splitAtDash(calc, Estimators);
98                         
99                         groups = validParameter.validFile(parameters, "groups", false);                 
100                         if (groups == "not found") { groups = ""; }
101                         else { 
102                                 splitAtDash(groups, Groups);
103                         }
104                         globaldata->Groups = Groups;
105                         
106                         string temp;
107                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
108                         convert(temp, freq); 
109                         
110                         temp = validParameter.validFile(parameters, "all", false);                              if (temp == "not found") { temp = "false"; }
111                         all = isTrue(temp);
112                                                 
113                         if (abort == false) {
114                                 
115                                 if (outputDir == "") { outputDir += hasPath(globaldata->inputFileName); }
116                                 string fileNameRoot = outputDir + getRootName(getSimpleName(globaldata->inputFileName));
117                                 format = globaldata->getFormat();
118                                 int i;
119                                 
120                                 validCalculator = new ValidCalculators();
121                                 util = new SharedUtil();
122                                 
123                                 for (i=0; i<Estimators.size(); i++) {
124                                         if (validCalculator->isValidCalculator("shared", Estimators[i]) == true) { 
125                                                 if (Estimators[i] == "sharedchao") { 
126                                                         cDisplays.push_back(new CollectDisplay(new SharedChao1(), new SharedOneColumnFile(fileNameRoot+"shared.chao")));
127                                                         outputNames.push_back(fileNameRoot+"shared.chao");
128                                                 }else if (Estimators[i] == "sharedsobs") { 
129                                                         cDisplays.push_back(new CollectDisplay(new SharedSobsCS(), new SharedOneColumnFile(fileNameRoot+"shared.sobs")));
130                                                         outputNames.push_back(fileNameRoot+"shared.sobs");
131                                                 }else if (Estimators[i] == "sharedace") { 
132                                                         cDisplays.push_back(new CollectDisplay(new SharedAce(), new SharedOneColumnFile(fileNameRoot+"shared.ace")));
133                                                         outputNames.push_back(fileNameRoot+"shared.ace");
134                                                 }else if (Estimators[i] == "jabund") {  
135                                                         cDisplays.push_back(new CollectDisplay(new JAbund(), new SharedOneColumnFile(fileNameRoot+"jabund")));
136                                                         outputNames.push_back(fileNameRoot+"jabund");
137                                                 }else if (Estimators[i] == "sorabund") { 
138                                                         cDisplays.push_back(new CollectDisplay(new SorAbund(), new SharedOneColumnFile(fileNameRoot+"sorabund")));
139                                                         outputNames.push_back(fileNameRoot+"sorabund");
140                                                 }else if (Estimators[i] == "jclass") { 
141                                                         cDisplays.push_back(new CollectDisplay(new Jclass(), new SharedOneColumnFile(fileNameRoot+"jclass")));
142                                                         outputNames.push_back(fileNameRoot+"jclass");
143                                                 }else if (Estimators[i] == "sorclass") { 
144                                                         cDisplays.push_back(new CollectDisplay(new SorClass(), new SharedOneColumnFile(fileNameRoot+"sorclass")));
145                                                         outputNames.push_back(fileNameRoot+"sorclass");
146                                                 }else if (Estimators[i] == "jest") { 
147                                                         cDisplays.push_back(new CollectDisplay(new Jest(), new SharedOneColumnFile(fileNameRoot+"jest")));
148                                                         outputNames.push_back(fileNameRoot+"jest");
149                                                 }else if (Estimators[i] == "sorest") { 
150                                                         cDisplays.push_back(new CollectDisplay(new SorEst(), new SharedOneColumnFile(fileNameRoot+"sorest")));
151                                                         outputNames.push_back(fileNameRoot+"sorest");
152                                                 }else if (Estimators[i] == "thetayc") { 
153                                                         cDisplays.push_back(new CollectDisplay(new ThetaYC(), new SharedOneColumnFile(fileNameRoot+"thetayc")));
154                                                         outputNames.push_back(fileNameRoot+"thetayc");
155                                                 }else if (Estimators[i] == "thetan") { 
156                                                         cDisplays.push_back(new CollectDisplay(new ThetaN(), new SharedOneColumnFile(fileNameRoot+"thetan")));
157                                                         outputNames.push_back(fileNameRoot+"thetan");
158                                                 }else if (Estimators[i] == "kstest") { 
159                                                         cDisplays.push_back(new CollectDisplay(new KSTest(), new SharedOneColumnFile(fileNameRoot+"kstest")));
160                                                         outputNames.push_back(fileNameRoot+"kstest");
161                                                 }else if (Estimators[i] == "whittaker") { 
162                                                         cDisplays.push_back(new CollectDisplay(new Whittaker(), new SharedOneColumnFile(fileNameRoot+"whittaker")));
163                                                         outputNames.push_back(fileNameRoot+"whittaker");
164                                                 }else if (Estimators[i] == "sharednseqs") { 
165                                                         cDisplays.push_back(new CollectDisplay(new SharedNSeqs(), new SharedOneColumnFile(fileNameRoot+"shared.nseqs")));
166                                                         outputNames.push_back(fileNameRoot+"shared.nseqs");
167                                                 }else if (Estimators[i] == "ochiai") { 
168                                                         cDisplays.push_back(new CollectDisplay(new Ochiai(), new SharedOneColumnFile(fileNameRoot+"ochiai")));
169                                                         outputNames.push_back(fileNameRoot+"ochiai");
170                                                 }else if (Estimators[i] == "anderberg") { 
171                                                         cDisplays.push_back(new CollectDisplay(new Anderberg(), new SharedOneColumnFile(fileNameRoot+"anderberg")));
172                                                         outputNames.push_back(fileNameRoot+"anderberg");
173                                                 }else if (Estimators[i] == "skulczynski") { 
174                                                         cDisplays.push_back(new CollectDisplay(new Kulczynski(), new SharedOneColumnFile(fileNameRoot+"kulczynski")));
175                                                         outputNames.push_back(fileNameRoot+"kulczynski");
176                                                 }else if (Estimators[i] == "kulczynskicody") { 
177                                                         cDisplays.push_back(new CollectDisplay(new KulczynskiCody(), new SharedOneColumnFile(fileNameRoot+"kulczynskicody")));
178                                                         outputNames.push_back(fileNameRoot+"kulczynskicody");
179                                                 }else if (Estimators[i] == "lennon") { 
180                                                         cDisplays.push_back(new CollectDisplay(new Lennon(), new SharedOneColumnFile(fileNameRoot+"lennon")));
181                                                         outputNames.push_back(fileNameRoot+"lennon");
182                                                 }else if (Estimators[i] == "morisitahorn") { 
183                                                         cDisplays.push_back(new CollectDisplay(new MorHorn(), new SharedOneColumnFile(fileNameRoot+"morisitahorn")));
184                                                         outputNames.push_back(fileNameRoot+"morisitahorn");
185                                                 }else if (Estimators[i] == "braycurtis") { 
186                                                         cDisplays.push_back(new CollectDisplay(new BrayCurtis(), new SharedOneColumnFile(fileNameRoot+"braycurtis")));
187                                                         outputNames.push_back(fileNameRoot+"braycurtis");
188                                                 }
189                                         }
190                                 }       
191                         }
192                 }
193
194         }
195         catch(exception& e) {
196                 m->errorOut(e, "CollectSharedCommand", "CollectSharedCommand");
197                 exit(1);
198         }
199 }
200 //**********************************************************************************************************************
201
202 void CollectSharedCommand::help(){
203         try {
204                 m->mothurOut("The collect.shared command can only be executed after a successful read.otu command.\n");
205                 m->mothurOut("The collect.shared command parameters are label, freq, calc and groups.  No parameters are required \n");
206                 m->mothurOut("The collect.shared command should be in the following format: \n");
207                 m->mothurOut("collect.shared(label=yourLabel, freq=yourFreq, calc=yourEstimators, groups=yourGroups).\n");
208                 m->mothurOut("Example collect.shared(label=unique-.01-.03, freq=10, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n");
209                 m->mothurOut("The default values for freq is 100 and calc are sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan.\n");
210                 m->mothurOut("The default value for groups is all the groups in your groupfile.\n");
211                 m->mothurOut("The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n");
212                 validCalculator->printCalc("shared", cout);
213                 m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
214                 m->mothurOut("The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n");
215                 m->mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
216                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n");
217                 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
218                 
219         }
220         catch(exception& e) {
221                 m->errorOut(e, "CollectSharedCommand", "help");
222                 exit(1);
223         }
224 }
225
226 //**********************************************************************************************************************
227
228 CollectSharedCommand::~CollectSharedCommand(){
229         if (abort == false) {
230                 delete input; globaldata->ginput = NULL;
231                 delete read;
232                 delete util;
233                 delete validCalculator;
234                 globaldata->gorder = NULL;
235         }
236 }
237
238 //**********************************************************************************************************************
239
240 int CollectSharedCommand::execute(){
241         try {
242                 
243                 if (abort == true) {    return 0;       }
244                 
245                 //if the users entered no valid calculators don't execute command
246                 if (cDisplays.size() == 0) { return 0; }
247                 for(int i=0;i<cDisplays.size();i++){    cDisplays[i]->setAll(all);      }       
248         
249                 read = new ReadOTUFile(globaldata->inputFileName);      
250                 read->read(&*globaldata); 
251                 
252                 input = globaldata->ginput;
253                 order = input->getSharedOrderVector();
254                 string lastLabel = order->getLabel();
255                 
256                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
257                 set<string> processedLabels;
258                 set<string> userLabels = labels;
259                         
260                 //set users groups
261                 util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "collect");
262                 util->updateGroupIndex(globaldata->Groups, globaldata->gGroupmap->groupIndex);
263
264                 while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
265                         if (m->control_pressed) { 
266                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());         }  
267                                         for(int i=0;i<cDisplays.size();i++){    delete cDisplays[i];    }
268                                         delete order; 
269                                         globaldata->Groups.clear();
270                                         return 0;
271                         }
272
273                         if(allLines == 1 || labels.count(order->getLabel()) == 1){
274                         
275                                 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
276                                 //create collectors curve
277                                 cCurve = new Collect(order, cDisplays);
278                                 cCurve->getSharedCurve(freq);
279                                 delete cCurve;
280                         
281                                 processedLabels.insert(order->getLabel());
282                                 userLabels.erase(order->getLabel());
283                         }
284                         
285                         //you have a label the user want that is smaller than this label and the last label has not already been processed
286                         if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
287                                 string saveLabel = order->getLabel();
288                                 
289                                 delete order;
290                                 order = input->getSharedOrderVector(lastLabel);
291                                 
292                                 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
293                                 //create collectors curve
294                                 cCurve = new Collect(order, cDisplays);
295                                 cCurve->getSharedCurve(freq);
296                                 delete cCurve;
297                                 
298                                 processedLabels.insert(order->getLabel());
299                                 userLabels.erase(order->getLabel());
300                                 
301                                 //restore real lastlabel to save below
302                                 order->setLabel(saveLabel);
303                         }
304                         
305                         
306                         lastLabel = order->getLabel();                  
307                         
308                         //get next line to process
309                         delete order;
310                         order = input->getSharedOrderVector();
311                 }
312                 
313                 if (m->control_pressed) { 
314                                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());         }  
315                                         for(int i=0;i<cDisplays.size();i++){    delete cDisplays[i];    }
316                                         globaldata->Groups.clear();
317                                         return 0;
318                 }
319                 
320                 //output error messages about any remaining user labels
321                 set<string>::iterator it;
322                 bool needToRun = false;
323                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
324                         m->mothurOut("Your file does not include the label " + *it); 
325                         if (processedLabels.count(lastLabel) != 1) {
326                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
327                                 needToRun = true;
328                         }else {
329                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
330                         }
331                 }
332                 
333                 //run last label if you need to
334                 if (needToRun == true)  {
335                         if (order != NULL) {  delete order;  }
336                         order = input->getSharedOrderVector(lastLabel);
337                         
338                         m->mothurOut(order->getLabel()); m->mothurOutEndLine();
339                         cCurve = new Collect(order, cDisplays);
340                         cCurve->getSharedCurve(freq);
341                         delete cCurve;
342                         
343                         if (m->control_pressed) { 
344                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());         }  
345                                 for(int i=0;i<cDisplays.size();i++){    delete cDisplays[i];    }
346                                 delete order; 
347                                 globaldata->Groups.clear();
348                                 return 0;
349                         }
350
351                         delete order;
352                 }
353                 
354                 for(int i=0;i<cDisplays.size();i++){    delete cDisplays[i];    }       
355                 
356                 //reset groups parameter
357                 globaldata->Groups.clear(); 
358                 
359                 m->mothurOutEndLine();
360                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
361                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
362                 m->mothurOutEndLine();
363
364                 
365                 return 0;
366         }
367         catch(exception& e) {
368                 m->errorOut(e, "CollectSharedCommand", "execute");
369                 exit(1);
370         }
371 }
372
373 /***********************************************************/