]> git.donarmstrong.com Git - mothur.git/blob - rarefactcommand.cpp
worked on hcluster. made .single command run using a sharedfile. and various other...
[mothur.git] / rarefactcommand.cpp
1 /*
2  *  rarefactcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "rarefactcommand.h"
11 #include "ace.h"
12 #include "sobs.h"
13 #include "nseqs.h"
14 #include "chao1.h"
15 #include "bootstrap.h"
16 #include "simpson.h"
17 #include "npshannon.h"
18 #include "shannon.h"
19 #include "jackknife.h"
20 #include "coverage.h"
21
22 //**********************************************************************************************************************
23
24
25 RareFactCommand::RareFactCommand(string option){
26         try {
27                 globaldata = GlobalData::getInstance();
28                 abort = false;
29                 allLines = 1;
30                 labels.clear();
31                 Estimators.clear();
32                 
33                 //allow user to run help
34                 if(option == "help") { validCalculator = new ValidCalculators(); help(); delete validCalculator; abort = true; }
35                 
36                 else {
37                         //valid paramters for this command
38                         string Array[] =  {"iters","freq","label","calc","abund"};
39                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
40                         
41                         OptionParser parser(option);
42                         map<string,string> parameters = parser.getParameters();
43                         
44                         ValidParameters validParameter;
45                 
46                         //check to make sure all parameters are valid for command
47                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
48                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
49                         }
50                         
51                         //make sure the user has already run the read.otu command
52                         if ((globaldata->getSharedFile() == "") && (globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { mothurOut("You must read a list, sabund, rabund or shared file before you can use the rarefact.single command."); mothurOutEndLine(); abort = true; }
53                         
54                         //check for optional parameter and set defaults
55                         // ...at some point should added some additional type checking...
56                         label = validParameter.validFile(parameters, "label", false);                   
57                         if (label == "not found") { label = ""; }
58                         else { 
59                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
60                                 else { allLines = 1;  }
61                         }
62                         
63                         //if the user has not specified any labels use the ones from read.otu
64                         if(label == "") {  
65                                 allLines = globaldata->allLines; 
66                                 labels = globaldata->labels; 
67                         }
68                                 
69                         calc = validParameter.validFile(parameters, "calc", false);                     
70                         if (calc == "not found") { calc = "sobs";  }
71                         else { 
72                                  if (calc == "default")  {  calc = "sobs";  }
73                         }
74                         splitAtDash(calc, Estimators);
75
76                         string temp;
77                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
78                         convert(temp, freq); 
79                         
80                         temp = validParameter.validFile(parameters, "abund", false);                    if (temp == "not found") { temp = "10"; }
81                         convert(temp, abund); 
82                         
83                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
84                         convert(temp, nIters); 
85                 }
86                 
87         }
88         catch(exception& e) {
89                 errorOut(e, "RareFactCommand", "RareFactCommand");
90                 exit(1);
91         }
92 }
93 //**********************************************************************************************************************
94
95 void RareFactCommand::help(){
96         try {
97                 mothurOut("The rarefaction.single command can only be executed after a successful read.otu WTIH ONE EXECEPTION.\n");
98                 mothurOut("The rarefaction.single command can be executed after a successful cluster command.  It will use the .list file from the output of the cluster.\n");
99                 mothurOut("The rarefaction.single command parameters are label, iters, freq, calc and abund.  No parameters are required. \n");
100                 mothurOut("The rarefaction.single command should be in the following format: \n");
101                 mothurOut("rarefaction.single(label=yourLabel, iters=yourIters, freq=yourFreq, calc=yourEstimators).\n");
102                 mothurOut("Example rarefaction.single(label=unique-.01-.03, iters=10000, freq=10, calc=sobs-rchao-race-rjack-rbootstrap-rshannon-rnpshannon-rsimpson).\n");
103                 mothurOut("The default values for iters is 1000, freq is 100, and calc is rarefaction which calculates the rarefaction curve for the observed richness.\n");
104                 validCalculator->printCalc("rarefaction", cout);
105                 mothurOut("The label parameter is used to analyze specific labels in your input.\n");
106                 mothurOut("Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq).\n\n");
107         }
108         catch(exception& e) {
109                 errorOut(e, "RareFactCommand", "help");
110                 exit(1);
111         }
112 }
113
114 //**********************************************************************************************************************
115
116 RareFactCommand::~RareFactCommand(){}
117
118 //**********************************************************************************************************************
119
120 int RareFactCommand::execute(){
121         try {
122         
123                 if (abort == true) { return 0; }
124                 
125                 if ((globaldata->getFormat() != "sharedfile")) { inputFileNames.push_back(globaldata->inputFileName);  }
126                 else {  inputFileNames = parseSharedFile(globaldata->getSharedFile());  globaldata->setFormat("rabund");  }
127                 
128                 for (int p = 0; p < inputFileNames.size(); p++) {
129                         
130                         string fileNameRoot = getRootName(inputFileNames[p]);
131                         globaldata->inputFileName = inputFileNames[p];
132                         
133                         if (inputFileNames.size() > 1) {
134                                 mothurOutEndLine(); mothurOut("Processing group " + groups[p]); mothurOutEndLine(); mothurOutEndLine();
135                         }
136                         int i;
137                         validCalculator = new ValidCalculators();
138                         
139                         
140                         for (i=0; i<Estimators.size(); i++) {
141                                 if (validCalculator->isValidCalculator("rarefaction", Estimators[i]) == true) { 
142                                         if (Estimators[i] == "sobs") { 
143                                                 rDisplays.push_back(new RareDisplay(new Sobs(), new ThreeColumnFile(fileNameRoot+"rarefaction")));
144                                         }else if (Estimators[i] == "chao") { 
145                                                 rDisplays.push_back(new RareDisplay(new Chao1(), new ThreeColumnFile(fileNameRoot+"r_chao")));
146                                         }else if (Estimators[i] == "ace") { 
147                                                 if(abund < 5)
148                                                         abund = 10;
149                                                 rDisplays.push_back(new RareDisplay(new Ace(abund), new ThreeColumnFile(fileNameRoot+"r_ace")));
150                                         }else if (Estimators[i] == "jack") { 
151                                                 rDisplays.push_back(new RareDisplay(new Jackknife(), new ThreeColumnFile(fileNameRoot+"r_jack")));
152                                         }else if (Estimators[i] == "shannon") { 
153                                                 rDisplays.push_back(new RareDisplay(new Shannon(), new ThreeColumnFile(fileNameRoot+"r_shannon")));
154                                         }else if (Estimators[i] == "npshannon") { 
155                                                 rDisplays.push_back(new RareDisplay(new NPShannon(), new ThreeColumnFile(fileNameRoot+"r_npshannon")));
156                                         }else if (Estimators[i] == "simpson") { 
157                                                 rDisplays.push_back(new RareDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"r_simpson")));
158                                         }else if (Estimators[i] == "bootstrap") { 
159                                                 rDisplays.push_back(new RareDisplay(new Bootstrap(), new ThreeColumnFile(fileNameRoot+"r_bootstrap")));
160                                         }else if (Estimators[i] == "coverage") { 
161                                                 rDisplays.push_back(new RareDisplay(new Coverage(), new ThreeColumnFile(fileNameRoot+"r_coverage")));
162                                         }else if (Estimators[i] == "nseqs") { 
163                                                 rDisplays.push_back(new RareDisplay(new NSeqs(), new ThreeColumnFile(fileNameRoot+"r_nseqs")));
164                                         }
165                                 }
166                         }
167                         
168                         
169                         //if the users entered no valid calculators don't execute command
170                         if (rDisplays.size() == 0) { return 0; }
171                         
172                         read = new ReadOTUFile(globaldata->inputFileName);      
173                         read->read(&*globaldata); 
174                         
175                         order = globaldata->gorder;
176                         string lastLabel = order->getLabel();
177                         input = globaldata->ginput;
178                         
179                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
180                         set<string> processedLabels;
181                         set<string> userLabels = labels;
182                         
183                         //as long as you are not at the end of the file or done wih the lines you want
184                         while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
185                                 
186                                 if(allLines == 1 || labels.count(order->getLabel()) == 1){
187                                         
188                                         rCurve = new Rarefact(order, rDisplays);
189                                         rCurve->getCurve(freq, nIters);
190                                         delete rCurve;
191                                         
192                                         mothurOut(order->getLabel()); mothurOutEndLine();
193                                         processedLabels.insert(order->getLabel());
194                                         userLabels.erase(order->getLabel());
195                                 }
196                                 
197                                 if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
198                                         string saveLabel = order->getLabel();
199                                         
200                                         delete order;
201                                         order = (input->getOrderVector(lastLabel));
202                                         
203                                         rCurve = new Rarefact(order, rDisplays);
204                                         rCurve->getCurve(freq, nIters);
205                                         delete rCurve;
206                                         
207                                         mothurOut(order->getLabel()); mothurOutEndLine();
208                                         processedLabels.insert(order->getLabel());
209                                         userLabels.erase(order->getLabel());
210                                         
211                                         //restore real lastlabel to save below
212                                         order->setLabel(saveLabel);
213                                 }
214                                 
215                                 lastLabel = order->getLabel();          
216                                 
217                                 delete order;
218                                 order = (input->getOrderVector());
219                         }
220                         
221                         //output error messages about any remaining user labels
222                         set<string>::iterator it;
223                         bool needToRun = false;
224                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
225                                 mothurOut("Your file does not include the label " + *it);
226                                 if (processedLabels.count(lastLabel) != 1) {
227                                         mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
228                                         needToRun = true;
229                                 }else {
230                                         mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
231                                 }
232                         }
233                         
234                         //run last label if you need to
235                         if (needToRun == true)  {
236                                 if (order != NULL) {    delete order;   }
237                                 order = (input->getOrderVector(lastLabel));
238                                 
239                                 rCurve = new Rarefact(order, rDisplays);
240                                 rCurve->getCurve(freq, nIters);
241                                 delete rCurve;
242                                 
243                                 mothurOut(order->getLabel()); mothurOutEndLine();
244                                 delete order;
245                         }
246                         
247                         
248                         for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }       
249                         rDisplays.clear();
250                         globaldata->gorder = NULL;
251                         delete input;  globaldata->ginput = NULL;
252                         delete read;
253                         delete validCalculator;
254                         
255                 }
256                 
257                 return 0;
258         }
259         catch(exception& e) {
260                 errorOut(e, "RareFactCommand", "execute");
261                 exit(1);
262         }
263 }
264 //**********************************************************************************************************************
265 vector<string> RareFactCommand::parseSharedFile(string filename) {
266         try {
267                 vector<string> filenames;
268                 
269                 map<string, ofstream*> filehandles;
270                 map<string, ofstream*>::iterator it3;
271                 
272                                 
273                 //read first line
274                 read = new ReadOTUFile(filename);       
275                 read->read(&*globaldata); 
276                         
277                 input = globaldata->ginput;
278                 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
279                 
280                 string sharedFileRoot = getRootName(filename);
281                 
282                 //clears file before we start to write to it below
283                 for (int i=0; i<lookup.size(); i++) {
284                         remove((sharedFileRoot + lookup[i]->getGroup() + ".rabund").c_str());
285                         filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
286                 }
287                 
288                 ofstream* temp;
289                 for (int i=0; i<lookup.size(); i++) {
290                         temp = new ofstream;
291                         filehandles[lookup[i]->getGroup()] = temp;
292                         groups.push_back(lookup[i]->getGroup());
293                 }
294
295                 while(lookup[0] != NULL) {
296                 
297                         for (int i = 0; i < lookup.size(); i++) {
298                                 RAbundVector rav = lookup[i]->getRAbundVector();
299                                 openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".rabund", *(filehandles[lookup[i]->getGroup()]));
300                                 rav.print(*(filehandles[lookup[i]->getGroup()]));
301                                 (*(filehandles[lookup[i]->getGroup()])).close();
302                         }
303                 
304                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
305                         lookup = input->getSharedRAbundVectors();
306                 }
307                 
308                 //free memory
309                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
310                         delete it3->second;
311                 }
312                 delete read;
313                 delete input;
314                 globaldata->ginput = NULL;
315
316                 return filenames;
317         }
318         catch(exception& e) {
319                 errorOut(e, "RareFactCommand", "parseSharedFile");
320                 exit(1);
321         }
322 }
323 //**********************************************************************************************************************
324
325
326