]> git.donarmstrong.com Git - mothur.git/blob - rarefactcommand.cpp
added a few evenness calculators and fixed a couple of bugs in filter.seqs and pre...
[mothur.git] / rarefactcommand.cpp
1 /*
2  *  rarefactcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "rarefactcommand.h"
11 #include "ace.h"
12 #include "sobs.h"
13 #include "nseqs.h"
14 #include "chao1.h"
15 #include "bootstrap.h"
16 #include "simpson.h"
17 #include "simpsoneven.h"
18 #include "heip.h"
19 #include "smithwilson.h"
20 #include "invsimpson.h"
21 #include "npshannon.h"
22 #include "shannoneven.h"
23 #include "shannon.h"
24 #include "jackknife.h"
25 #include "coverage.h"
26
27 //**********************************************************************************************************************
28
29
30 RareFactCommand::RareFactCommand(string option)  {
31         try {
32                 globaldata = GlobalData::getInstance();
33                 abort = false;
34                 allLines = 1;
35                 labels.clear();
36                 Estimators.clear();
37                                 
38                 //allow user to run help
39                 if(option == "help") { validCalculator = new ValidCalculators(); help(); delete validCalculator; abort = true; }
40                 
41                 else {
42                         //valid paramters for this command
43                         string Array[] =  {"iters","freq","label","calc","abund","outputdir","inputdir"};
44                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
45                         
46                         OptionParser parser(option);
47                         map<string,string> parameters = parser.getParameters();
48                         
49                         ValidParameters validParameter;
50                 
51                         //check to make sure all parameters are valid for command
52                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
53                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
54                         }
55                         
56                         //if the user changes the output directory command factory will send this info to us in the output parameter 
57                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
58                                 outputDir = ""; 
59                                 outputDir += hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it  
60                         }
61
62                         //make sure the user has already run the read.otu command
63                         if ((globaldata->getSharedFile() == "") && (globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { m->mothurOut("You must read a list, sabund, rabund or shared file before you can use the rarefact.single command."); m->mothurOutEndLine(); abort = true; }
64                         
65                         //check for optional parameter and set defaults
66                         // ...at some point should added some additional type checking...
67                         label = validParameter.validFile(parameters, "label", false);                   
68                         if (label == "not found") { label = ""; }
69                         else { 
70                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
71                                 else { allLines = 1;  }
72                         }
73                         
74                         //if the user has not specified any labels use the ones from read.otu
75                         if(label == "") {  
76                                 allLines = globaldata->allLines; 
77                                 labels = globaldata->labels; 
78                         }
79                                 
80                         calc = validParameter.validFile(parameters, "calc", false);                     
81                         if (calc == "not found") { calc = "sobs";  }
82                         else { 
83                                  if (calc == "default")  {  calc = "sobs";  }
84                         }
85                         splitAtDash(calc, Estimators);
86
87                         string temp;
88                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
89                         convert(temp, freq); 
90                         
91                         temp = validParameter.validFile(parameters, "abund", false);                    if (temp == "not found") { temp = "10"; }
92                         convert(temp, abund); 
93                         
94                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
95                         convert(temp, nIters); 
96                 }
97                 
98         }
99         catch(exception& e) {
100                 m->errorOut(e, "RareFactCommand", "RareFactCommand");
101                 exit(1);
102         }
103 }
104 //**********************************************************************************************************************
105
106 void RareFactCommand::help(){
107         try {
108                 m->mothurOut("The rarefaction.single command can only be executed after a successful read.otu WTIH ONE EXECEPTION.\n");
109                 m->mothurOut("The rarefaction.single command can be executed after a successful cluster command.  It will use the .list file from the output of the cluster.\n");
110                 m->mothurOut("The rarefaction.single command parameters are label, iters, freq, calc and abund.  No parameters are required. \n");
111                 m->mothurOut("The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n");
112                 m->mothurOut("The rarefaction.single command should be in the following format: \n");
113                 m->mothurOut("rarefaction.single(label=yourLabel, iters=yourIters, freq=yourFreq, calc=yourEstimators).\n");
114                 m->mothurOut("Example rarefaction.single(label=unique-.01-.03, iters=10000, freq=10, calc=sobs-rchao-race-rjack-rbootstrap-rshannon-rnpshannon-rsimpson).\n");
115                 m->mothurOut("The default values for iters is 1000, freq is 100, and calc is rarefaction which calculates the rarefaction curve for the observed richness.\n");
116                 validCalculator->printCalc("rarefaction", cout);
117                 m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
118                 m->mothurOut("Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq).\n\n");
119         }
120         catch(exception& e) {
121                 m->errorOut(e, "RareFactCommand", "help");
122                 exit(1);
123         }
124 }
125
126 //**********************************************************************************************************************
127
128 RareFactCommand::~RareFactCommand(){}
129
130 //**********************************************************************************************************************
131
132 int RareFactCommand::execute(){
133         try {
134         
135                 if (abort == true) { return 0; }
136                 
137                 vector<string> outputNames;
138                 
139                 string hadShared = "";
140                 if ((globaldata->getFormat() != "sharedfile")) { inputFileNames.push_back(globaldata->inputFileName);  }
141                 else { hadShared = globaldata->getSharedFile(); inputFileNames = parseSharedFile(globaldata->getSharedFile());  globaldata->setFormat("rabund");  }
142                                 
143                 if (m->control_pressed) { if (hadShared != "") {  globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile");  } return 0; }
144                 
145                 for (int p = 0; p < inputFileNames.size(); p++) {
146                         
147                         string fileNameRoot = outputDir + getRootName(getSimpleName(inputFileNames[p]));
148                         globaldata->inputFileName = inputFileNames[p];
149                         
150                         if (m->control_pressed) { if (hadShared != "") {  globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile");  } return 0; }
151                         
152                         if (inputFileNames.size() > 1) {
153                                 m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine();
154                         }
155                         int i;
156                         validCalculator = new ValidCalculators();
157                         
158                         
159                         for (i=0; i<Estimators.size(); i++) {
160                                 if (validCalculator->isValidCalculator("rarefaction", Estimators[i]) == true) { 
161                                         if (Estimators[i] == "sobs") { 
162                                                 rDisplays.push_back(new RareDisplay(new Sobs(), new ThreeColumnFile(fileNameRoot+"rarefaction")));
163                                                 outputNames.push_back(fileNameRoot+"rarefaction");
164                                         }else if (Estimators[i] == "chao") { 
165                                                 rDisplays.push_back(new RareDisplay(new Chao1(), new ThreeColumnFile(fileNameRoot+"r_chao")));
166                                                 outputNames.push_back(fileNameRoot+"r_chao");
167                                         }else if (Estimators[i] == "ace") { 
168                                                 if(abund < 5)
169                                                         abund = 10;
170                                                 rDisplays.push_back(new RareDisplay(new Ace(abund), new ThreeColumnFile(fileNameRoot+"r_ace")));
171                                                 outputNames.push_back(fileNameRoot+"r_ace");
172                                         }else if (Estimators[i] == "jack") { 
173                                                 rDisplays.push_back(new RareDisplay(new Jackknife(), new ThreeColumnFile(fileNameRoot+"r_jack")));
174                                                 outputNames.push_back(fileNameRoot+"r_jack");
175                                         }else if (Estimators[i] == "shannon") { 
176                                                 rDisplays.push_back(new RareDisplay(new Shannon(), new ThreeColumnFile(fileNameRoot+"r_shannon")));
177                                                 outputNames.push_back(fileNameRoot+"r_shannon");
178                                         }else if (Estimators[i] == "shannoneven") { 
179                                                 rDisplays.push_back(new RareDisplay(new ShannonEven(), new ThreeColumnFile(fileNameRoot+"r_shannoneven")));
180                                                 outputNames.push_back(fileNameRoot+"r_shannoneven");
181                                         }else if (Estimators[i] == "heip") { 
182                                                 rDisplays.push_back(new RareDisplay(new Heip(), new ThreeColumnFile(fileNameRoot+"r_heip")));
183                                                 outputNames.push_back(fileNameRoot+"r_heip");
184                                         }else if (Estimators[i] == "smithwilson") { 
185                                                 rDisplays.push_back(new RareDisplay(new SmithWilson(), new ThreeColumnFile(fileNameRoot+"r_smithwilson")));
186                                                 outputNames.push_back(fileNameRoot+"r_smithwilson");
187                                         }else if (Estimators[i] == "npshannon") { 
188                                                 rDisplays.push_back(new RareDisplay(new NPShannon(), new ThreeColumnFile(fileNameRoot+"r_npshannon")));
189                                                 outputNames.push_back(fileNameRoot+"r_npshannon");
190                                         }else if (Estimators[i] == "simpson") { 
191                                                 rDisplays.push_back(new RareDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"r_simpson")));
192                                                 outputNames.push_back(fileNameRoot+"r_simpson");
193                                         }else if (Estimators[i] == "simpsoneven") { 
194                                                 rDisplays.push_back(new RareDisplay(new SimpsonEven(), new ThreeColumnFile(fileNameRoot+"r_simpsoneven")));
195                                                 outputNames.push_back(fileNameRoot+"r_simpsoneven");
196                                         }else if (Estimators[i] == "invsimpson") { 
197                                                 rDisplays.push_back(new RareDisplay(new InvSimpson(), new ThreeColumnFile(fileNameRoot+"r_invsimpson")));
198                                                 outputNames.push_back(fileNameRoot+"r_invsimpson");
199                                         }else if (Estimators[i] == "bootstrap") { 
200                                                 rDisplays.push_back(new RareDisplay(new Bootstrap(), new ThreeColumnFile(fileNameRoot+"r_bootstrap")));
201                                                 outputNames.push_back(fileNameRoot+"r_bootstrap");
202                                         }else if (Estimators[i] == "coverage") { 
203                                                 rDisplays.push_back(new RareDisplay(new Coverage(), new ThreeColumnFile(fileNameRoot+"r_coverage")));
204                                                 outputNames.push_back(fileNameRoot+"r_coverage");
205                                         }else if (Estimators[i] == "nseqs") { 
206                                                 rDisplays.push_back(new RareDisplay(new NSeqs(), new ThreeColumnFile(fileNameRoot+"r_nseqs")));
207                                                 outputNames.push_back(fileNameRoot+"r_nseqs");
208                                         }
209                                 }
210                         }
211                         
212                         
213                         //if the users entered no valid calculators don't execute command
214                         if (rDisplays.size() == 0) { for(int i=0;i<rDisplays.size();i++){       delete rDisplays[i];    } delete validCalculator; return 0; }
215                         
216                         read = new ReadOTUFile(globaldata->inputFileName);      
217                         read->read(&*globaldata); 
218                         
219                         order = globaldata->gorder;
220                         string lastLabel = order->getLabel();
221                         input = globaldata->ginput;
222                         
223                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
224                         set<string> processedLabels;
225                         set<string> userLabels = labels;
226                         
227                         if (m->control_pressed) { if (hadShared != "") {  globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile");  } for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    } delete validCalculator; delete read; delete input; globaldata->ginput = NULL; delete order; globaldata->gorder = NULL; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
228                         
229                         //as long as you are not at the end of the file or done wih the lines you want
230                         while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
231                                 
232                                 if (m->control_pressed) { if (hadShared != "") {  globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile");  } for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    } delete validCalculator; delete read; delete input; globaldata->ginput = NULL; delete order; globaldata->gorder = NULL; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
233
234                                 
235                                 if(allLines == 1 || labels.count(order->getLabel()) == 1){
236                                         
237                                         m->mothurOut(order->getLabel()); m->mothurOutEndLine();
238                                         rCurve = new Rarefact(order, rDisplays);
239                                         rCurve->getCurve(freq, nIters);
240                                         delete rCurve;
241                                         
242                                         processedLabels.insert(order->getLabel());
243                                         userLabels.erase(order->getLabel());
244                                 }
245                                 
246                                 if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
247                                         string saveLabel = order->getLabel();
248                                         
249                                         delete order;
250                                         order = (input->getOrderVector(lastLabel));
251                                         
252                                         m->mothurOut(order->getLabel()); m->mothurOutEndLine();
253                                         rCurve = new Rarefact(order, rDisplays);
254                                         rCurve->getCurve(freq, nIters);
255                                         delete rCurve;
256                                         
257                                         processedLabels.insert(order->getLabel());
258                                         userLabels.erase(order->getLabel());
259                                         
260                                         //restore real lastlabel to save below
261                                         order->setLabel(saveLabel);
262                                 }
263                                 
264                                 lastLabel = order->getLabel();          
265                                 
266                                 delete order;
267                                 order = (input->getOrderVector());
268                         }
269                         
270                         if (m->control_pressed) { if (hadShared != "") {  globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile");  } for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    } delete validCalculator; delete read; delete input; globaldata->ginput = NULL; for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }  return 0; }
271
272                         //output error messages about any remaining user labels
273                         set<string>::iterator it;
274                         bool needToRun = false;
275                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
276                                 m->mothurOut("Your file does not include the label " + *it);
277                                 if (processedLabels.count(lastLabel) != 1) {
278                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
279                                         needToRun = true;
280                                 }else {
281                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
282                                 }
283                         }
284                         
285                         if (m->control_pressed) { if (hadShared != "") {  globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile");  } for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    } delete validCalculator; delete read; delete input; globaldata->ginput = NULL;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
286
287                         //run last label if you need to
288                         if (needToRun == true)  {
289                                 if (order != NULL) {    delete order;   }
290                                 order = (input->getOrderVector(lastLabel));
291                                 
292                                 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
293                                 rCurve = new Rarefact(order, rDisplays);
294                                 rCurve->getCurve(freq, nIters);
295                                 delete rCurve;
296                                 
297                                 delete order;
298                         }
299                         
300                         
301                         for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }       
302                         rDisplays.clear();
303                         globaldata->gorder = NULL;
304                         delete input;  globaldata->ginput = NULL;
305                         delete read;
306                         delete validCalculator;
307                         
308                 }
309                 
310                 if (hadShared != "") {  globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile");  }
311                 
312                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); } return 0; }
313
314                 m->mothurOutEndLine();
315                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
316                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
317                 m->mothurOutEndLine();
318
319                 return 0;
320         }
321         catch(exception& e) {
322                 m->errorOut(e, "RareFactCommand", "execute");
323                 exit(1);
324         }
325 }
326 //**********************************************************************************************************************
327 vector<string> RareFactCommand::parseSharedFile(string filename) {
328         try {
329                 vector<string> filenames;
330                 
331                 map<string, ofstream*> filehandles;
332                 map<string, ofstream*>::iterator it3;
333                 
334                                 
335                 //read first line
336                 read = new ReadOTUFile(filename);       
337                 read->read(&*globaldata); 
338                         
339                 input = globaldata->ginput;
340                 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
341                 
342                 string sharedFileRoot = getRootName(filename);
343                 
344                 //clears file before we start to write to it below
345                 for (int i=0; i<lookup.size(); i++) {
346                         remove((sharedFileRoot + lookup[i]->getGroup() + ".rabund").c_str());
347                         filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
348                 }
349                 
350                 ofstream* temp;
351                 for (int i=0; i<lookup.size(); i++) {
352                         temp = new ofstream;
353                         filehandles[lookup[i]->getGroup()] = temp;
354                         groups.push_back(lookup[i]->getGroup());
355                 }
356
357                 while(lookup[0] != NULL) {
358                 
359                         for (int i = 0; i < lookup.size(); i++) {
360                                 RAbundVector rav = lookup[i]->getRAbundVector();
361                                 openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".rabund", *(filehandles[lookup[i]->getGroup()]));
362                                 rav.print(*(filehandles[lookup[i]->getGroup()]));
363                                 (*(filehandles[lookup[i]->getGroup()])).close();
364                         }
365                 
366                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
367                         lookup = input->getSharedRAbundVectors();
368                 }
369                 
370                 //free memory
371                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
372                         delete it3->second;
373                 }
374                 delete read;
375                 delete input;
376                 globaldata->ginput = NULL;
377
378                 return filenames;
379         }
380         catch(exception& e) {
381                 m->errorOut(e, "RareFactCommand", "parseSharedFile");
382                 exit(1);
383         }
384 }
385 //**********************************************************************************************************************
386
387
388