]> git.donarmstrong.com Git - mothur.git/blob - collectsharedcommand.cpp
merged pat's trim seqs edits with sarah's major overhaul of global data; also added...
[mothur.git] / collectsharedcommand.cpp
1 /*
2  *  collectsharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "collectsharedcommand.h"
11 #include "sharedsobscollectsummary.h"
12 #include "sharedchao1.h"
13 #include "sharedace.h"
14 #include "sharedjabund.h"
15 #include "sharedsorabund.h"
16 #include "sharedjclass.h"
17 #include "sharedsorclass.h"
18 #include "sharedjest.h"
19 #include "sharedsorest.h"
20 #include "sharedthetayc.h"
21 #include "sharedthetan.h"
22 #include "sharedkstest.h"
23 #include "whittaker.h"
24 #include "sharednseqs.h"
25 #include "sharedochiai.h"
26 #include "sharedanderbergs.h"
27 #include "sharedkulczynski.h"
28 #include "sharedkulczynskicody.h"
29 #include "sharedlennon.h"
30 #include "sharedmorisitahorn.h"
31 #include "sharedbraycurtis.h"
32 #include "sharedjackknife.h"
33 #include "whittaker.h"
34
35
36
37 //**********************************************************************************************************************
38
39 CollectSharedCommand::CollectSharedCommand(string option){
40         try {
41                 globaldata = GlobalData::getInstance();
42                 abort = false;
43                 allLines = 1;
44                 lines.clear();
45                 labels.clear();
46                 Estimators.clear();
47                 
48                 //allow user to run help
49                 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
50                 
51                 else {
52                         //valid paramters for this command
53                         string Array[] =  {"freq","line","label","calc","groups"};
54                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
55                         
56                         OptionParser parser(option);
57                         map<string,string> parameters=parser.getParameters();
58                         
59                         ValidParameters validParameter;
60                 
61                         //check to make sure all parameters are valid for command
62                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
63                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
64                         }
65                         
66                         //make sure the user has already run the read.otu command
67                         if (globaldata->getSharedFile() == "") {
68                                 if (globaldata->getListFile() == "") { cout << "You must read a list and a group, or a shared before you can use the collect.shared command." << endl; abort = true; }
69                                 else if (globaldata->getGroupFile() == "") { cout << "You must read a list and a group, or a shared before you can use the collect.shared command." << endl; abort = true; }
70                         }
71
72                         
73                         //check for optional parameter and set defaults
74                         // ...at some point should added some additional type checking...
75                         line = validParameter.validFile(parameters, "line", false);                             
76                         if (line == "not found") { line = "";  }
77                         else { 
78                                 if(line != "all") {  splitAtDash(line, lines);  allLines = 0;  }
79                                 else { allLines = 1;  }
80                         }
81                         
82                         label = validParameter.validFile(parameters, "label", false);                   
83                         if (label == "not found") { label = ""; }
84                         else { 
85                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
86                                 else { allLines = 1;  }
87                         }
88                         
89                         //make sure user did not use both the line and label parameters
90                         if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; }
91                         //if the user has not specified any line or labels use the ones from read.otu
92                         else if((line == "") && (label == "")) {  
93                                 allLines = globaldata->allLines; 
94                                 labels = globaldata->labels; 
95                                 lines = globaldata->lines;
96                         }
97                                 
98                         calc = validParameter.validFile(parameters, "calc", false);                     
99                         if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
100                         else { 
101                                  if (calc == "default")  {  calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
102                         }
103                         splitAtDash(calc, Estimators);
104                         
105                         groups = validParameter.validFile(parameters, "groups", false);                 
106                         if (groups == "not found") { groups = ""; }
107                         else { 
108                                 splitAtDash(groups, Groups);
109                                 globaldata->Groups = Groups;
110                         }
111                         
112                         string temp;
113                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
114                         convert(temp, freq); 
115                                                 
116                         if (abort == false) {
117                         
118                                 string fileNameRoot = getRootName(globaldata->inputFileName);
119 //                              format = globaldata->getFormat();
120                                 int i;
121                                 
122                                 validCalculator = new ValidCalculators();
123                                 util = new SharedUtil();
124                                 
125                                 for (i=0; i<Estimators.size(); i++) {
126                                         if (validCalculator->isValidCalculator("shared", Estimators[i]) == true) { 
127                                                 if (Estimators[i] == "sharedchao") { 
128                                                         cDisplays.push_back(new CollectDisplay(new SharedChao1(), new SharedOneColumnFile(fileNameRoot+"shared.chao")));
129                                                 }else if (Estimators[i] == "sharedsobs") { 
130                                                         cDisplays.push_back(new CollectDisplay(new SharedSobsCS(), new SharedOneColumnFile(fileNameRoot+"shared.sobs")));
131                                                 }else if (Estimators[i] == "sharedace") { 
132                                                         cDisplays.push_back(new CollectDisplay(new SharedAce(), new SharedOneColumnFile(fileNameRoot+"shared.ace")));
133                                                 }else if (Estimators[i] == "jabund") {  
134                                                         cDisplays.push_back(new CollectDisplay(new JAbund(), new SharedOneColumnFile(fileNameRoot+"jabund")));
135                                                 }else if (Estimators[i] == "sorabund") { 
136                                                         cDisplays.push_back(new CollectDisplay(new SorAbund(), new SharedOneColumnFile(fileNameRoot+"sorabund")));
137                                                 }else if (Estimators[i] == "jclass") { 
138                                                         cDisplays.push_back(new CollectDisplay(new Jclass(), new SharedOneColumnFile(fileNameRoot+"jclass")));
139                                                 }else if (Estimators[i] == "sorclass") { 
140                                                         cDisplays.push_back(new CollectDisplay(new SorClass(), new SharedOneColumnFile(fileNameRoot+"sorclass")));
141                                                 }else if (Estimators[i] == "jest") { 
142                                                         cDisplays.push_back(new CollectDisplay(new Jest(), new SharedOneColumnFile(fileNameRoot+"jest")));
143                                                 }else if (Estimators[i] == "sorest") { 
144                                                         cDisplays.push_back(new CollectDisplay(new SorEst(), new SharedOneColumnFile(fileNameRoot+"sorest")));
145                                                 }else if (Estimators[i] == "thetayc") { 
146                                                         cDisplays.push_back(new CollectDisplay(new ThetaYC(), new SharedOneColumnFile(fileNameRoot+"thetayc")));
147                                                 }else if (Estimators[i] == "thetan") { 
148                                                         cDisplays.push_back(new CollectDisplay(new ThetaN(), new SharedOneColumnFile(fileNameRoot+"thetan")));
149                                                 }else if (Estimators[i] == "kstest") { 
150                                                         cDisplays.push_back(new CollectDisplay(new KSTest(), new SharedOneColumnFile(fileNameRoot+"kstest")));
151                                                 }else if (Estimators[i] == "whittaker") { 
152                                                         cDisplays.push_back(new CollectDisplay(new Whittaker(), new SharedOneColumnFile(fileNameRoot+"whittaker")));
153                                                 }else if (Estimators[i] == "sharednseqs") { 
154                                                         cDisplays.push_back(new CollectDisplay(new SharedNSeqs(), new SharedOneColumnFile(fileNameRoot+"shared.nseqs")));
155                                                 }else if (Estimators[i] == "ochiai") { 
156                                                         cDisplays.push_back(new CollectDisplay(new Ochiai(), new SharedOneColumnFile(fileNameRoot+"ochiai")));
157                                                 }else if (Estimators[i] == "anderberg") { 
158                                                         cDisplays.push_back(new CollectDisplay(new Anderberg(), new SharedOneColumnFile(fileNameRoot+"anderberg")));
159                                                 }else if (Estimators[i] == "skulczynski") { 
160                                                         cDisplays.push_back(new CollectDisplay(new Kulczynski(), new SharedOneColumnFile(fileNameRoot+"kulczynski")));
161                                                 }else if (Estimators[i] == "kulczynskicody") { 
162                                                         cDisplays.push_back(new CollectDisplay(new KulczynskiCody(), new SharedOneColumnFile(fileNameRoot+"kulczynskicody")));
163                                                 }else if (Estimators[i] == "lennon") { 
164                                                         cDisplays.push_back(new CollectDisplay(new Lennon(), new SharedOneColumnFile(fileNameRoot+"lennon")));
165                                                 }else if (Estimators[i] == "morisitahorn") { 
166                                                         cDisplays.push_back(new CollectDisplay(new MorHorn(), new SharedOneColumnFile(fileNameRoot+"morisitahorn")));
167                                                 }else if (Estimators[i] == "braycurtis") { 
168                                                         cDisplays.push_back(new CollectDisplay(new BrayCurtis(), new SharedOneColumnFile(fileNameRoot+"braycurtis")));
169                                                 }
170                                         }
171                                 }       
172                         }
173                 }
174
175         }
176         catch(exception& e) {
177                 cout << "Standard Error: " << e.what() << " has occurred in the CollectSharedCommand class Function CollectSharedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
178                 exit(1);
179         }
180         catch(...) {
181                 cout << "An unknown error has occurred in the CollectSharedCommand class function CollectSharedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
182                 exit(1);
183         }       
184                         
185 }
186 //**********************************************************************************************************************
187
188 void CollectSharedCommand::help(){
189         try {
190                 cout << "The collect.shared command can only be executed after a successful read.otu command." << "\n";
191                 cout << "The collect.shared command parameters are label, line, freq, calc and groups.  No parameters are required, but you may not use " << "\n";
192                 cout << "both the line and label parameters at the same time. The collect.shared command should be in the following format: " << "\n";
193                 cout << "collect.shared(label=yourLabel, line=yourLines, freq=yourFreq, calc=yourEstimators, groups=yourGroups)." << "\n";
194                 cout << "Example collect.shared(label=unique-.01-.03, line=0-5-10, freq=10, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan)." << "\n";
195                 cout << "The default values for freq is 100 and calc are sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan." << "\n";
196                 cout << "The default value for groups is all the groups in your groupfile." << "\n";
197                 validCalculator->printCalc("shared", cout);
198                 cout << "The label and line parameters are used to analyze specific lines in your input." << "\n";
199                 cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups." << "\n";
200                 cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n";
201                 
202         }
203         catch(exception& e) {
204                 cout << "Standard Error: " << e.what() << " has occurred in the CollectSharedCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
205                 exit(1);
206         }
207         catch(...) {
208                 cout << "An unknown error has occurred in the CollectSharedCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
209                 exit(1);
210         }       
211 }
212
213 //**********************************************************************************************************************
214
215 CollectSharedCommand::~CollectSharedCommand(){
216         delete order;
217         delete input;
218         delete cCurve;
219         delete read;
220         delete util;
221         delete validCalculator;
222 }
223
224 //**********************************************************************************************************************
225
226 int CollectSharedCommand::execute(){
227         try {
228                 
229                 if (abort == true) {    return 0;       }
230                 
231                 int count = 1;
232                 
233                 //if the users entered no valid calculators don't execute command
234                 if (cDisplays.size() == 0) { return 0; }
235                 
236                 read = new ReadOTUFile(globaldata->inputFileName);      
237                 read->read(&*globaldata); 
238                         
239                 input = globaldata->ginput;
240                 order = input->getSharedOrderVector();
241                 SharedOrderVector* lastOrder = order;
242                 
243                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
244                 set<string> processedLabels;
245                 set<string> userLabels = globaldata->labels;
246                 set<int> userLines = globaldata->lines;
247                                 
248                 //set users groups
249                 util->setGroups(globaldata->Groups, globaldata->gGroupmap->namesOfGroups, "collect");
250                 util->updateGroupIndex(globaldata->Groups, globaldata->gGroupmap->groupIndex);
251
252                 while((order != NULL) && ((globaldata->allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
253
254                         if(globaldata->allLines == 1 || globaldata->lines.count(count) == 1 || globaldata->labels.count(order->getLabel()) == 1){
255                                 
256                                 //create collectors curve
257                                 cCurve = new Collect(order, cDisplays);
258                                 cCurve->getSharedCurve(freq);
259                                 delete cCurve;
260                         
261                                 cout << order->getLabel() << '\t' << count << endl;
262                                 processedLabels.insert(order->getLabel());
263                                 userLabels.erase(order->getLabel());
264                                 userLines.erase(count);
265
266                         //you have a label the user want that is smaller than this line and the last line has not already been processed 
267                         }
268                         
269                         if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastOrder->getLabel()) != 1)) {
270                                 //create collectors curve
271                                 cCurve = new Collect(lastOrder, cDisplays);
272                                 cCurve->getSharedCurve(freq);
273                                 delete cCurve;
274                         
275                                 cout << lastOrder->getLabel() << '\t' << count << endl;
276                                 processedLabels.insert(lastOrder->getLabel());
277                                 userLabels.erase(lastOrder->getLabel());
278                         }
279                         
280                         if (count != 1) { delete lastOrder; }
281                         lastOrder = order;                      
282                         
283                         //get next line to process
284                         order = input->getSharedOrderVector();
285                         count++;
286                 }
287                 
288                 //output error messages about any remaining user labels
289                 set<string>::iterator it;
290                 bool needToRun = false;
291                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
292                         cout << "Your file does not include the label "<< *it; 
293                         if (processedLabels.count(lastOrder->getLabel()) != 1) {
294                                 cout << ". I will use " << lastOrder->getLabel() << "." << endl;
295                                 needToRun = true;
296                         }else {
297                                 cout << ". Please refer to " << lastOrder->getLabel() << "." << endl;
298                         }
299                 }
300                 
301                 //run last line if you need to
302                 if (needToRun == true)  {
303                         cCurve = new Collect(lastOrder, cDisplays);
304                         cCurve->getCurve(freq);
305                         delete cCurve;
306                         
307                         cout << lastOrder->getLabel() << '\t' << count << endl;
308                 }
309                 
310                 delete lastOrder;
311                 for(int i=0;i<cDisplays.size();i++){    delete cDisplays[i];    }       
312                 
313                 //reset groups parameter
314                 globaldata->Groups.clear(); 
315                 
316                 return 0;
317         }
318         catch(exception& e) {
319                 cout << "Standard Error: " << e.what() << " has occurred in the CollectSharedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
320                 exit(1);
321         }
322         catch(...) {
323                 cout << "An unknown error has occurred in the CollectSharedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
324                 exit(1);
325         }       
326 }
327
328 /***********************************************************/
329