]> git.donarmstrong.com Git - mothur.git/blob - rarefactcommand.cpp
fixes while testing 1.33.0
[mothur.git] / rarefactcommand.cpp
1 /*
2  *  rarefactcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "rarefactcommand.h"
11 #include "ace.h"
12 #include "sobs.h"
13 #include "nseqs.h"
14 #include "chao1.h"
15 #include "bootstrap.h"
16 #include "simpson.h"
17 #include "simpsoneven.h"
18 #include "heip.h"
19 #include "smithwilson.h"
20 #include "invsimpson.h"
21 #include "npshannon.h"
22 #include "shannoneven.h"
23 #include "shannon.h"
24 #include "jackknife.h"
25 #include "coverage.h"
26
27
28 //**********************************************************************************************************************
29 vector<string> RareFactCommand::setParameters(){        
30         try {
31                 CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false,true); parameters.push_back(plist);
32                 CommandParameter prabund("rabund", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false); parameters.push_back(prabund);
33                 CommandParameter psabund("sabund", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false); parameters.push_back(psabund);
34                 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","",false,false,true); parameters.push_back(pshared);
35                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
36                 CommandParameter pfreq("freq", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pfreq);
37                 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
38                 CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap", "sobs", "", "", "","",true,false,true); parameters.push_back(pcalc);
39                 CommandParameter pabund("abund", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pabund);
40                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
41                 CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pgroupmode);
42                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
43                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
44                 
45                 vector<string> myArray;
46                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
47                 return myArray;
48         }
49         catch(exception& e) {
50                 m->errorOut(e, "RareFactCommand", "setParameters");
51                 exit(1);
52         }
53 }
54 //**********************************************************************************************************************
55 string RareFactCommand::getHelpString(){        
56         try {
57                 ValidCalculators validCalculator;
58                 string helpString = "";
59                 helpString += "The rarefaction.single command parameters are list, sabund, rabund, shared, label, iters, freq, calc, processors, groupmode and abund.  list, sabund, rabund or shared is required unless you have a valid current file. \n";
60                 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
61                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
62                 helpString += "The rarefaction.single command should be in the following format: \n";
63                 helpString += "rarefaction.single(label=yourLabel, iters=yourIters, freq=yourFreq, calc=yourEstimators).\n";
64                 helpString += "Example rarefaction.single(label=unique-.01-.03, iters=10000, freq=10, calc=sobs-rchao-race-rjack-rbootstrap-rshannon-rnpshannon-rsimpson).\n";
65                 helpString += "The default values for iters is 1000, freq is 100, and calc is rarefaction which calculates the rarefaction curve for the observed richness.\n";
66                 validCalculator.printCalc("rarefaction");
67                 helpString += "If you are running rarefaction.single with a shared file and would like your results collated in one file, set groupmode=t. (Default=true).\n";
68                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
69                 helpString += "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq).\n";
70                 return helpString;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "RareFactCommand", "getHelpString");
74                 exit(1);
75         }
76 }
77 //**********************************************************************************************************************
78 string RareFactCommand::getOutputPattern(string type) {
79     try {
80         string pattern = "";
81         if (type == "rarefaction") {  pattern =  "[filename],rarefaction"; }
82         else if (type == "r_chao") {  pattern =  "[filename],r_chao"; }
83         else if (type == "r_ace") {  pattern =  "[filename],r_ace"; }
84         else if (type == "r_jack") {  pattern =  "[filename],r_jack"; }
85         else if (type == "r_shannon") {  pattern =  "[filename],r_shannon"; }
86         else if (type == "r_shannoneven") {  pattern =  "[filename],r_shannoneven"; }
87         else if (type == "r_smithwilson") {  pattern =  "[filename],r_smithwilson"; }
88         else if (type == "r_npshannon") {  pattern =  "[filename],r_npshannon"; }
89         else if (type == "r_simpson") {  pattern =  "[filename],r_simpson"; }
90         else if (type == "r_simpsoneven") {  pattern =  "[filename],r_simpsoneven"; }
91         else if (type == "r_invsimpson") {  pattern =  "[filename],r_invsimpson"; }
92         else if (type == "r_bootstrap") {  pattern =  "[filename],r_bootstrap"; }
93         else if (type == "r_coverage") {  pattern =  "[filename],r_coverage"; }
94         else if (type == "r_nseqs") {  pattern =  "[filename],r_nseqs"; }
95         else if (type == "r_heip") {  pattern =  "[filename],r_heip"; }
96         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
97         
98         return pattern;
99     }
100     catch(exception& e) {
101         m->errorOut(e, "RareFactCommand", "getOutputPattern");
102         exit(1);
103     }
104 }
105 //**********************************************************************************************************************
106 RareFactCommand::RareFactCommand(){     
107         try {
108                 abort = true; calledHelp = true; 
109                 setParameters();
110                 vector<string> tempOutNames;
111                 outputTypes["rarefaction"] = tempOutNames;
112                 outputTypes["r_chao"] = tempOutNames;
113                 outputTypes["r_ace"] = tempOutNames;
114                 outputTypes["r_jack"] = tempOutNames;
115                 outputTypes["r_shannon"] = tempOutNames;
116                 outputTypes["r_shannoneven"] = tempOutNames;
117                 outputTypes["r_heip"] = tempOutNames;
118                 outputTypes["r_smithwilson"] = tempOutNames;
119                 outputTypes["r_npshannon"] = tempOutNames;
120                 outputTypes["r_simpson"] = tempOutNames;
121                 outputTypes["r_simpsoneven"] = tempOutNames;
122                 outputTypes["r_invsimpson"] = tempOutNames;
123                 outputTypes["r_bootstrap"] = tempOutNames;
124                 outputTypes["r_coverage"] = tempOutNames;
125                 outputTypes["r_nseqs"] = tempOutNames;
126         }
127         catch(exception& e) {
128                 m->errorOut(e, "RareFactCommand", "RareFactCommand");
129                 exit(1);
130         }
131 }
132 //**********************************************************************************************************************
133 RareFactCommand::RareFactCommand(string option)  {
134         try {
135                 abort = false; calledHelp = false;   
136                 allLines = 1;
137                                                 
138                 //allow user to run help
139                 if(option == "help") { help(); abort = true; calledHelp = true; }
140                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
141                 
142                 else {
143                         vector<string> myArray = setParameters();
144                         
145                         OptionParser parser(option);
146                         map<string,string> parameters = parser.getParameters();
147                         map<string,string>::iterator it;
148                         
149                         ValidParameters validParameter;
150                 
151                         //check to make sure all parameters are valid for command
152                         for (it = parameters.begin(); it != parameters.end(); it++) { 
153                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
154                         }
155                         
156                         //initialize outputTypes
157                         vector<string> tempOutNames;
158                         outputTypes["rarefaction"] = tempOutNames;
159                         outputTypes["r_chao"] = tempOutNames;
160                         outputTypes["r_ace"] = tempOutNames;
161                         outputTypes["r_jack"] = tempOutNames;
162                         outputTypes["r_shannon"] = tempOutNames;
163                         outputTypes["r_shannoneven"] = tempOutNames;
164                         outputTypes["r_heip"] = tempOutNames;
165                         outputTypes["r_smithwilson"] = tempOutNames;
166                         outputTypes["r_npshannon"] = tempOutNames;
167                         outputTypes["r_simpson"] = tempOutNames;
168                         outputTypes["r_simpsoneven"] = tempOutNames;
169                         outputTypes["r_invsimpson"] = tempOutNames;
170                         outputTypes["r_bootstrap"] = tempOutNames;
171                         outputTypes["r_coverage"] = tempOutNames;
172                         outputTypes["r_nseqs"] = tempOutNames;
173                         
174                         //if the user changes the input directory command factory will send this info to us in the output parameter 
175                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
176                         if (inputDir == "not found"){   inputDir = "";          }
177                         else {
178                                 string path;
179                                 it = parameters.find("shared");
180                                 //user has given a template file
181                                 if(it != parameters.end()){ 
182                                         path = m->hasPath(it->second);
183                                         //if the user has not given a path then, add inputdir. else leave path alone.
184                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
185                                 }
186                                 
187                                 it = parameters.find("rabund");
188                                 //user has given a template file
189                                 if(it != parameters.end()){ 
190                                         path = m->hasPath(it->second);
191                                         //if the user has not given a path then, add inputdir. else leave path alone.
192                                         if (path == "") {       parameters["rabund"] = inputDir + it->second;           }
193                                 }
194                                 
195                                 it = parameters.find("sabund");
196                                 //user has given a template file
197                                 if(it != parameters.end()){ 
198                                         path = m->hasPath(it->second);
199                                         //if the user has not given a path then, add inputdir. else leave path alone.
200                                         if (path == "") {       parameters["sabund"] = inputDir + it->second;           }
201                                 }
202                                 
203                                 it = parameters.find("list");
204                                 //user has given a template file
205                                 if(it != parameters.end()){ 
206                                         path = m->hasPath(it->second);
207                                         //if the user has not given a path then, add inputdir. else leave path alone.
208                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
209                                 }
210                         }
211                         
212                         //check for required parameters
213                         listfile = validParameter.validFile(parameters, "list", true);
214                         if (listfile == "not open") { listfile = ""; abort = true; }
215                         else if (listfile == "not found") { listfile = ""; }
216                         else {  format = "list"; inputfile = listfile; m->setListFile(listfile); }
217                         
218                         sabundfile = validParameter.validFile(parameters, "sabund", true);
219                         if (sabundfile == "not open") { sabundfile = ""; abort = true; }        
220                         else if (sabundfile == "not found") { sabundfile = ""; }
221                         else {  format = "sabund"; inputfile = sabundfile; m->setSabundFile(sabundfile); }
222                         
223                         rabundfile = validParameter.validFile(parameters, "rabund", true);
224                         if (rabundfile == "not open") { rabundfile = ""; abort = true; }        
225                         else if (rabundfile == "not found") { rabundfile = ""; }
226                         else {  format = "rabund"; inputfile = rabundfile; m->setRabundFile(rabundfile); }
227                         
228                         sharedfile = validParameter.validFile(parameters, "shared", true);
229                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
230                         else if (sharedfile == "not found") { sharedfile = ""; }
231                         else {  format = "sharedfile"; inputfile = sharedfile; m->setSharedFile(sharedfile); }
232                                 
233                         if ((sharedfile == "") && (listfile == "") && (rabundfile == "") && (sabundfile == "")) { 
234                                 //is there are current file available for any of these?
235                                 //give priority to shared, then list, then rabund, then sabund
236                                 //if there is a current shared file, use it
237                                 sharedfile = m->getSharedFile(); 
238                                 if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
239                                 else { 
240                                         listfile = m->getListFile(); 
241                                         if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
242                                         else { 
243                                                 rabundfile = m->getRabundFile(); 
244                                                 if (rabundfile != "") { inputfile = rabundfile; format = "rabund"; m->mothurOut("Using " + rabundfile + " as input file for the rabund parameter."); m->mothurOutEndLine(); }
245                                                 else { 
246                                                         sabundfile = m->getSabundFile(); 
247                                                         if (sabundfile != "") { inputfile = sabundfile; format = "sabund"; m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter."); m->mothurOutEndLine(); }
248                                                         else { 
249                                                                 m->mothurOut("No valid current files. You must provide a list, sabund, rabund or shared file before you can use the collect.single command."); m->mothurOutEndLine(); 
250                                                                 abort = true;
251                                                         }
252                                                 }
253                                         }
254                                 }
255                         }
256                         
257                         //if the user changes the output directory command factory will send this info to us in the output parameter 
258                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputfile);              }
259
260                         //check for optional parameter and set defaults
261                         // ...at some point should added some additional type checking...
262                         label = validParameter.validFile(parameters, "label", false);                   
263                         if (label == "not found") { label = ""; }
264                         else { 
265                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
266                                 else { allLines = 1;  }
267                         }
268                                 
269                         calc = validParameter.validFile(parameters, "calc", false);                     
270                         if (calc == "not found") { calc = "sobs";  }
271                         else { 
272                                  if (calc == "default")  {  calc = "sobs";  }
273                         }
274                         m->splitAtDash(calc, Estimators);
275                         if (m->inUsersGroups("citation", Estimators)) { 
276                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
277                                 //remove citation from list of calcs
278                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
279                         }
280
281                         string temp;
282                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
283                         m->mothurConvert(temp, freq); 
284                         
285                         temp = validParameter.validFile(parameters, "abund", false);                    if (temp == "not found") { temp = "10"; }
286                         m->mothurConvert(temp, abund); 
287                         
288                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
289                         m->mothurConvert(temp, nIters); 
290                         
291                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
292                         m->setProcessors(temp);
293                         m->mothurConvert(temp, processors);
294                         
295                         temp = validParameter.validFile(parameters, "groupmode", false);                if (temp == "not found") { temp = "T"; }
296                         groupMode = m->isTrue(temp);
297                 }
298                 
299         }
300         catch(exception& e) {
301                 m->errorOut(e, "RareFactCommand", "RareFactCommand");
302                 exit(1);
303         }
304 }
305 //**********************************************************************************************************************
306
307 int RareFactCommand::execute(){
308         try {
309         
310                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
311                 
312         map<string, set<int> > labelToEnds;
313                 if ((format != "sharedfile")) { inputFileNames.push_back(inputfile);  }
314                 else {  inputFileNames = parseSharedFile(sharedfile, labelToEnds);  format = "rabund"; }
315         
316         if (m->control_pressed) { return 0; }
317                 
318                 map<int, string> file2Group; //index in outputNames[i] -> group
319                 for (int p = 0; p < inputFileNames.size(); p++) {
320                         
321                         string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p]));
322                                                 
323                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }  m->clearGroups();  return 0; }
324                         
325                         if (inputFileNames.size() > 1) {
326                                 m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine();
327                         }
328                         int i;
329                         ValidCalculators validCalculator;
330                         
331             map<string, string> variables; 
332             variables["[filename]"] = fileNameRoot;
333                           
334                         for (i=0; i<Estimators.size(); i++) {
335                                 if (validCalculator.isValidCalculator("rarefaction", Estimators[i]) == true) { 
336                                         if (Estimators[i] == "sobs") { 
337                                                 rDisplays.push_back(new RareDisplay(new Sobs(), new ThreeColumnFile(getOutputFileName("rarefaction",variables))));
338                                                 outputNames.push_back(getOutputFileName("rarefaction",variables)); outputTypes["rarefaction"].push_back(getOutputFileName("rarefaction",variables));
339                                         }else if (Estimators[i] == "chao") { 
340                                                 rDisplays.push_back(new RareDisplay(new Chao1(), new ThreeColumnFile(getOutputFileName("r_chao",variables))));
341                                                 outputNames.push_back(getOutputFileName("r_chao",variables)); outputTypes["r_chao"].push_back(getOutputFileName("r_chao",variables));
342                                         }else if (Estimators[i] == "ace") { 
343                                                 if(abund < 5)
344                                                         abund = 10;
345                                                 rDisplays.push_back(new RareDisplay(new Ace(abund), new ThreeColumnFile(getOutputFileName("r_ace",variables))));
346                                                 outputNames.push_back(getOutputFileName("r_ace",variables)); outputTypes["r_ace"].push_back(getOutputFileName("r_ace",variables));
347                                         }else if (Estimators[i] == "jack") { 
348                                                 rDisplays.push_back(new RareDisplay(new Jackknife(), new ThreeColumnFile(getOutputFileName("r_jack",variables))));
349                                                 outputNames.push_back(getOutputFileName("r_jack",variables)); outputTypes["r_jack"].push_back(getOutputFileName("r_jack",variables));
350                                         }else if (Estimators[i] == "shannon") { 
351                                                 rDisplays.push_back(new RareDisplay(new Shannon(), new ThreeColumnFile(getOutputFileName("r_shannon",variables))));
352                                                 outputNames.push_back(getOutputFileName("r_shannon",variables)); outputTypes["r_shannon"].push_back(getOutputFileName("r_shannon",variables));
353                                         }else if (Estimators[i] == "shannoneven") { 
354                                                 rDisplays.push_back(new RareDisplay(new ShannonEven(), new ThreeColumnFile(getOutputFileName("r_shannoneven",variables))));
355                                                 outputNames.push_back(getOutputFileName("r_shannoneven",variables)); outputTypes["r_shannoneven"].push_back(getOutputFileName("r_shannoneven",variables));
356                                         }else if (Estimators[i] == "heip") { 
357                                                 rDisplays.push_back(new RareDisplay(new Heip(), new ThreeColumnFile(getOutputFileName("r_heip",variables))));
358                                                 outputNames.push_back(getOutputFileName("r_heip",variables)); outputTypes["r_heip"].push_back(getOutputFileName("r_heip",variables));
359                                         }else if (Estimators[i] == "smithwilson") { 
360                                                 rDisplays.push_back(new RareDisplay(new SmithWilson(), new ThreeColumnFile(getOutputFileName("r_smithwilson",variables))));
361                                                 outputNames.push_back(getOutputFileName("r_smithwilson",variables)); outputTypes["r_smithwilson"].push_back(getOutputFileName("r_smithwilson",variables));
362                                         }else if (Estimators[i] == "npshannon") { 
363                                                 rDisplays.push_back(new RareDisplay(new NPShannon(), new ThreeColumnFile(getOutputFileName("r_npshannon",variables))));
364                                                 outputNames.push_back(getOutputFileName("r_npshannon",variables)); outputTypes["r_npshannon"].push_back(getOutputFileName("r_npshannon",variables));
365                                         }else if (Estimators[i] == "simpson") { 
366                                                 rDisplays.push_back(new RareDisplay(new Simpson(), new ThreeColumnFile(getOutputFileName("r_simpson",variables))));
367                                                 outputNames.push_back(getOutputFileName("r_simpson",variables)); outputTypes["r_simpson"].push_back(getOutputFileName("r_simpson",variables));
368                                         }else if (Estimators[i] == "simpsoneven") { 
369                                                 rDisplays.push_back(new RareDisplay(new SimpsonEven(), new ThreeColumnFile(getOutputFileName("r_simpsoneven",variables))));
370                                                 outputNames.push_back(getOutputFileName("r_simpsoneven",variables)); outputTypes["r_simpsoneven"].push_back(getOutputFileName("r_simpsoneven",variables));
371                                         }else if (Estimators[i] == "invsimpson") { 
372                                                 rDisplays.push_back(new RareDisplay(new InvSimpson(), new ThreeColumnFile(getOutputFileName("r_invsimpson",variables))));
373                                                 outputNames.push_back(getOutputFileName("r_invsimpson",variables)); outputTypes["r_invsimpson"].push_back(getOutputFileName("r_invsimpson",variables));
374                                         }else if (Estimators[i] == "bootstrap") { 
375                                                 rDisplays.push_back(new RareDisplay(new Bootstrap(), new ThreeColumnFile(getOutputFileName("r_bootstrap",variables))));
376                                                 outputNames.push_back(getOutputFileName("r_bootstrap",variables)); outputTypes["r_bootstrap"].push_back(getOutputFileName("r_bootstrap",variables));
377                                         }else if (Estimators[i] == "coverage") { 
378                                                 rDisplays.push_back(new RareDisplay(new Coverage(), new ThreeColumnFile(getOutputFileName("r_coverage",variables))));
379                                                 outputNames.push_back(getOutputFileName("r_coverage",variables)); outputTypes["r_coverage"].push_back(getOutputFileName("r_coverage",variables));
380                                         }else if (Estimators[i] == "nseqs") { 
381                                                 rDisplays.push_back(new RareDisplay(new NSeqs(), new ThreeColumnFile(getOutputFileName("r_nseqs",variables))));
382                                                 outputNames.push_back(getOutputFileName("r_nseqs",variables)); outputTypes["r_nseqs"].push_back(getOutputFileName("r_nseqs",variables));
383                                         }
384                     if (inputFileNames.size() > 1) { file2Group[outputNames.size()-1] = groups[p]; }
385                                 }
386                         }
387                         
388                         
389                         //if the users entered no valid calculators don't execute command
390                         if (rDisplays.size() == 0) { for(int i=0;i<rDisplays.size();i++){       delete rDisplays[i];    }  return 0; }
391                         
392                         input = new InputData(inputFileNames[p], format);                       
393                         order = input->getOrderVector();
394                         string lastLabel = order->getLabel();
395                         
396                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
397                         set<string> processedLabels;
398                         set<string> userLabels = labels;
399                         
400                         if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;  delete order;  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
401                         
402                         //as long as you are not at the end of the file or done wih the lines you want
403                         while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
404                                 
405                                 if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;  delete order;  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
406
407                                 
408                                 if(allLines == 1 || labels.count(order->getLabel()) == 1){
409                                         
410                                         m->mothurOut(order->getLabel()); m->mothurOutEndLine();
411                     map<string, set<int> >::iterator itEndings = labelToEnds.find(order->getLabel());
412                     set<int> ends;
413                     if (itEndings != labelToEnds.end()) { ends = itEndings->second; }
414                                         rCurve = new Rarefact(order, rDisplays, processors, ends);
415                                         rCurve->getCurve(freq, nIters);
416                                         delete rCurve;
417                                         
418                                         processedLabels.insert(order->getLabel());
419                                         userLabels.erase(order->getLabel());
420                                 }
421                                 
422                                 if ((m->anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
423                                         string saveLabel = order->getLabel();
424                                         
425                                         delete order;
426                                         order = (input->getOrderVector(lastLabel));
427                                         
428                                         m->mothurOut(order->getLabel()); m->mothurOutEndLine();
429                                         map<string, set<int> >::iterator itEndings = labelToEnds.find(order->getLabel());
430                     set<int> ends;
431                     if (itEndings != labelToEnds.end()) { ends = itEndings->second; }
432                                         rCurve = new Rarefact(order, rDisplays, processors, ends);
433
434                                         rCurve->getCurve(freq, nIters);
435                                         delete rCurve;
436                                         
437                                         processedLabels.insert(order->getLabel());
438                                         userLabels.erase(order->getLabel());
439                                         
440                                         //restore real lastlabel to save below
441                                         order->setLabel(saveLabel);
442                                 }
443                                 
444                                 lastLabel = order->getLabel();          
445                                 
446                                 delete order;
447                                 order = (input->getOrderVector());
448                         }
449                         
450                         if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;   for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
451
452                         //output error messages about any remaining user labels
453                         set<string>::iterator it;
454                         bool needToRun = false;
455                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
456                                 m->mothurOut("Your file does not include the label " + *it);
457                                 if (processedLabels.count(lastLabel) != 1) {
458                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
459                                         needToRun = true;
460                                 }else {
461                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
462                                 }
463                         }
464                         
465                         if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;   for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
466
467                         //run last label if you need to
468                         if (needToRun == true)  {
469                                 if (order != NULL) {    delete order;   }
470                                 order = (input->getOrderVector(lastLabel));
471                                 
472                                 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
473                                 map<string, set<int> >::iterator itEndings = labelToEnds.find(order->getLabel());
474                 set<int> ends;
475                 if (itEndings != labelToEnds.end()) { ends = itEndings->second; }
476                 rCurve = new Rarefact(order, rDisplays, processors, ends);
477
478                                 rCurve->getCurve(freq, nIters);
479                                 delete rCurve;
480                                 
481                                 delete order;
482                         }
483                         
484                         
485                         for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }       
486                         rDisplays.clear();
487                         delete input;  
488                 }
489                 
490                 
491                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
492
493                 //create summary file containing all the groups data for each label - this function just combines the info from the files already created.
494                 if ((sharedfile != "") && (groupMode)) {   outputNames = createGroupFile(outputNames, file2Group);  }
495
496                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
497
498                 m->mothurOutEndLine();
499                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
500                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
501                 m->mothurOutEndLine();
502
503                 return 0;
504         }
505         catch(exception& e) {
506                 m->errorOut(e, "RareFactCommand", "execute");
507                 exit(1);
508         }
509 }
510 //**********************************************************************************************************************
511 vector<string> RareFactCommand::createGroupFile(vector<string>& outputNames, map<int, string> file2Group) {
512         try {
513                 
514                 vector<string> newFileNames;
515                 
516                 //find different types of files
517                 map<string, map<string, string> > typesFiles;
518         map<string, vector< vector<string> > > fileLabels; //combofile name to labels. each label is a vector because it may be unique lci hci.
519         vector<string> groupNames;
520                 for (int i = 0; i < outputNames.size(); i++) {
521             
522                         string extension = m->getExtension(outputNames[i]);
523             string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
524                         m->mothurRemove(combineFileName); //remove old file
525             
526                         ifstream in;
527                         m->openInputFile(outputNames[i], in);
528                         
529                         string labels = m->getline(in);
530             
531                         istringstream iss (labels,istringstream::in);
532             string newLabel = ""; vector<string> theseLabels;
533             while(!iss.eof()) {  iss >> newLabel; m->gobble(iss); theseLabels.push_back(newLabel); }
534             vector< vector<string> > allLabels;
535             vector<string> thisSet; thisSet.push_back(theseLabels[0]); allLabels.push_back(thisSet); thisSet.clear(); //makes "numSampled" its own grouping
536             for (int j = 1; j < theseLabels.size()-1; j++) {
537                 if (theseLabels[j+1] == "lci") {
538                     thisSet.push_back(theseLabels[j]); 
539                     thisSet.push_back(theseLabels[j+1]); 
540                     thisSet.push_back(theseLabels[j+2]);
541                     j++; j++;
542                 }else{ //no lci or hci for this calc.
543                     thisSet.push_back(theseLabels[j]); 
544                 }
545                 allLabels.push_back(thisSet); 
546                 thisSet.clear();
547             }
548             fileLabels[combineFileName] = allLabels;
549                     
550             map<string, map<string, string> >::iterator itfind = typesFiles.find(extension);
551             if (itfind != typesFiles.end()) {
552                 (itfind->second)[outputNames[i]] = file2Group[i];
553             }else {
554                 map<string, string> temp;  
555                 temp[outputNames[i]] = file2Group[i];
556                 typesFiles[extension] = temp;
557             }
558             if (!(m->inUsersGroups(file2Group[i], groupNames))) {  groupNames.push_back(file2Group[i]); }
559                 }
560                 
561                 //for each type create a combo file
562                 
563                 for (map<string, map<string, string> >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) {
564                         
565                         ofstream out;
566                         string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + it->first;
567                         m->openOutputFileAppend(combineFileName, out);
568                         newFileNames.push_back(combineFileName);
569                         map<string, string> thisTypesFiles = it->second; //it->second maps filename to group
570             set<int> numSampledSet;
571             
572                         //open each type summary file
573                         map<string, map<int, vector< vector<string> > > > files; //maps file name to lines in file
574                         int maxLines = 0;
575                         for (map<string, string>::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) {
576                 
577                 string thisfilename = itFileNameGroup->first;
578                 string group = itFileNameGroup->second;
579                 
580                                 ifstream temp;
581                                 m->openInputFile(thisfilename, temp);
582                                 
583                                 //read through first line - labels
584                                 m->getline(temp);       m->gobble(temp);
585                                 
586                                 map<int, vector< vector<string> > > thisFilesLines;
587                                 while (!temp.eof()){
588                     int numSampled = 0;
589                     temp >> numSampled; m->gobble(temp);
590                 
591                     vector< vector<string> > theseReads;
592                     vector<string> thisSet; thisSet.push_back(toString(numSampled)); theseReads.push_back(thisSet); thisSet.clear();
593                     for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
594                         vector<string> reads;
595                         string next = "";
596                         for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
597                             temp >> next; m->gobble(temp);
598                             reads.push_back(next);
599                         }
600                         theseReads.push_back(reads);
601                     }
602                     thisFilesLines[numSampled] = theseReads;
603                     m->gobble(temp);
604                    
605                     numSampledSet.insert(numSampled);
606                                 }
607                                 
608                                 files[group] = thisFilesLines;
609                                 
610                                 //save longest file for below
611                                 if (maxLines < thisFilesLines.size()) { maxLines = thisFilesLines.size(); }
612                                 
613                                 temp.close();
614                                 m->mothurRemove(thisfilename);
615                         }
616                         
617             //output new labels line
618             out << fileLabels[combineFileName][0][0] << '\t';
619             for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
620                 for (int n = 0; n < groupNames.size(); n++) { // for each group
621                     for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
622                         out << fileLabels[combineFileName][k][l] << '-' << groupNames[n] << '\t';
623                     }
624                 }
625             }
626                         out << endl;
627             
628                         //for each label
629                         for (set<int>::iterator itNumSampled = numSampledSet.begin(); itNumSampled != numSampledSet.end(); itNumSampled++) {
630                                 
631                 out << (*itNumSampled) << '\t';
632                                
633                 if (m->control_pressed) { break; }
634                 
635                 for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //each chunk
636                                     //grab data for each group
637                     for (map<string, map<int, vector< vector<string> > > >::iterator itFileNameGroup = files.begin(); itFileNameGroup != files.end(); itFileNameGroup++) {
638                         
639                         string group = itFileNameGroup->first;
640                        
641                         map<int, vector< vector<string> > >::iterator itLine = files[group].find(*itNumSampled);
642                         if (itLine != files[group].end()) { 
643                             for (int l = 0; l < (itLine->second)[k].size(); l++) { 
644                                 out << (itLine->second)[k][l] << '\t';
645                                
646                             }                             
647                         }else { 
648                             for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { 
649                                 out << "NA" << '\t';
650                             } 
651                         }
652                     }
653                 }
654                 out << endl;
655                         }       
656                         out.close();
657                 }
658                 
659                 //return combine file name
660                 return newFileNames;
661                 
662         }
663         catch(exception& e) {
664                 m->errorOut(e, "RareFactCommand", "createGroupFile");
665                 exit(1);
666         }
667 }
668 //**********************************************************************************************************************
669 vector<string> RareFactCommand::parseSharedFile(string filename, map<string, set<int> >& label2Ends) {
670         try {
671                 vector<string> filenames;
672                 
673                 map<string, ofstream*> filehandles;
674                 map<string, ofstream*>::iterator it3;
675                 
676                 input = new InputData(filename, "sharedfile");
677                 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
678                 
679                 string sharedFileRoot = m->getRootName(filename);
680                 
681                 //clears file before we start to write to it below
682                 for (int i=0; i<lookup.size(); i++) {
683                         m->mothurRemove((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
684                         filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
685                 }
686                 
687                 ofstream* temp;
688                 for (int i=0; i<lookup.size(); i++) {
689                         temp = new ofstream;
690                         filehandles[lookup[i]->getGroup()] = temp;
691                         groups.push_back(lookup[i]->getGroup());
692                 }
693
694                 while(lookup[0] != NULL) {
695                 
696                         for (int i = 0; i < lookup.size(); i++) {
697                                 RAbundVector rav = lookup[i]->getRAbundVector();
698                                 m->openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".rabund", *(filehandles[lookup[i]->getGroup()]));
699                                 rav.print(*(filehandles[lookup[i]->getGroup()]));
700                                 (*(filehandles[lookup[i]->getGroup()])).close();
701                 label2Ends[lookup[i]->getLabel()].insert(rav.getNumSeqs());
702                         }
703                 
704                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
705                         lookup = input->getSharedRAbundVectors();
706                 }
707                 
708                 //free memory
709                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
710                         delete it3->second;
711                 }
712                 
713                 delete input;
714                 m->clearGroups();
715
716                 return filenames;
717         }
718         catch(exception& e) {
719                 m->errorOut(e, "RareFactCommand", "parseSharedFile");
720                 exit(1);
721         }
722 }
723 //**********************************************************************************************************************
724
725
726