]> git.donarmstrong.com Git - mothur.git/blob - rarefactcommand.cpp
fixed bug in sffinfo when ~ was used in the sff filename. fixed issue in shhh.flows...
[mothur.git] / rarefactcommand.cpp
1 /*
2  *  rarefactcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "rarefactcommand.h"
11 #include "ace.h"
12 #include "sobs.h"
13 #include "nseqs.h"
14 #include "chao1.h"
15 #include "bootstrap.h"
16 #include "simpson.h"
17 #include "simpsoneven.h"
18 #include "heip.h"
19 #include "smithwilson.h"
20 #include "invsimpson.h"
21 #include "npshannon.h"
22 #include "shannoneven.h"
23 #include "shannon.h"
24 #include "jackknife.h"
25 #include "coverage.h"
26
27
28 //**********************************************************************************************************************
29 vector<string> RareFactCommand::setParameters(){        
30         try {
31                 CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(plist);
32                 CommandParameter prabund("rabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(prabund);
33                 CommandParameter psabund("sabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(psabund);
34                 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(pshared);
35                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
36                 CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
37                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
38                 CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap", "sobs", "", "", "",true,false); parameters.push_back(pcalc);
39                 CommandParameter pabund("abund", "Number", "", "10", "", "", "",false,false); parameters.push_back(pabund);
40                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
41                 CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pgroupmode);
42                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
43                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
44                 
45                 vector<string> myArray;
46                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
47                 return myArray;
48         }
49         catch(exception& e) {
50                 m->errorOut(e, "RareFactCommand", "setParameters");
51                 exit(1);
52         }
53 }
54 //**********************************************************************************************************************
55 string RareFactCommand::getHelpString(){        
56         try {
57                 ValidCalculators validCalculator;
58                 string helpString = "";
59                 helpString += "The rarefaction.single command parameters are list, sabund, rabund, shared, label, iters, freq, calc, processors and abund.  list, sabund, rabund or shared is required unless you have a valid current file. \n";
60                 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
61                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
62                 helpString += "The rarefaction.single command should be in the following format: \n";
63                 helpString += "rarefaction.single(label=yourLabel, iters=yourIters, freq=yourFreq, calc=yourEstimators).\n";
64                 helpString += "Example rarefaction.single(label=unique-.01-.03, iters=10000, freq=10, calc=sobs-rchao-race-rjack-rbootstrap-rshannon-rnpshannon-rsimpson).\n";
65                 helpString += "The default values for iters is 1000, freq is 100, and calc is rarefaction which calculates the rarefaction curve for the observed richness.\n";
66                 validCalculator.printCalc("rarefaction");
67                 helpString += "If you are running rarefaction.single with a shared file and would like your results collated in one file, set groupmode=t. (Default=true).\n";
68                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
69                 helpString += "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq).\n";
70                 return helpString;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "RareFactCommand", "getHelpString");
74                 exit(1);
75         }
76 }
77
78 //**********************************************************************************************************************
79 RareFactCommand::RareFactCommand(){     
80         try {
81                 abort = true; calledHelp = true; 
82                 setParameters();
83                 vector<string> tempOutNames;
84                 outputTypes["rarefaction"] = tempOutNames;
85                 outputTypes["r_chao"] = tempOutNames;
86                 outputTypes["r_ace"] = tempOutNames;
87                 outputTypes["r_jack"] = tempOutNames;
88                 outputTypes["r_shannon"] = tempOutNames;
89                 outputTypes["r_shannoneven"] = tempOutNames;
90                 outputTypes["r_heip"] = tempOutNames;
91                 outputTypes["r_smithwilson"] = tempOutNames;
92                 outputTypes["r_npshannon"] = tempOutNames;
93                 outputTypes["r_simpson"] = tempOutNames;
94                 outputTypes["r_simpsoneven"] = tempOutNames;
95                 outputTypes["r_invsimpson"] = tempOutNames;
96                 outputTypes["r_bootstrap"] = tempOutNames;
97                 outputTypes["r_coverage"] = tempOutNames;
98                 outputTypes["r_nseqs"] = tempOutNames;
99         }
100         catch(exception& e) {
101                 m->errorOut(e, "RareFactCommand", "RareFactCommand");
102                 exit(1);
103         }
104 }
105 //**********************************************************************************************************************
106 RareFactCommand::RareFactCommand(string option)  {
107         try {
108                 abort = false; calledHelp = false;   
109                 allLines = 1;
110                                                 
111                 //allow user to run help
112                 if(option == "help") { help(); abort = true; calledHelp = true; }
113                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
114                 
115                 else {
116                         vector<string> myArray = setParameters();
117                         
118                         OptionParser parser(option);
119                         map<string,string> parameters = parser.getParameters();
120                         map<string,string>::iterator it;
121                         
122                         ValidParameters validParameter;
123                 
124                         //check to make sure all parameters are valid for command
125                         for (it = parameters.begin(); it != parameters.end(); it++) { 
126                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
127                         }
128                         
129                         //initialize outputTypes
130                         vector<string> tempOutNames;
131                         outputTypes["rarefaction"] = tempOutNames;
132                         outputTypes["r_chao"] = tempOutNames;
133                         outputTypes["r_ace"] = tempOutNames;
134                         outputTypes["r_jack"] = tempOutNames;
135                         outputTypes["r_shannon"] = tempOutNames;
136                         outputTypes["r_shannoneven"] = tempOutNames;
137                         outputTypes["r_heip"] = tempOutNames;
138                         outputTypes["r_smithwilson"] = tempOutNames;
139                         outputTypes["r_npshannon"] = tempOutNames;
140                         outputTypes["r_simpson"] = tempOutNames;
141                         outputTypes["r_simpsoneven"] = tempOutNames;
142                         outputTypes["r_invsimpson"] = tempOutNames;
143                         outputTypes["r_bootstrap"] = tempOutNames;
144                         outputTypes["r_coverage"] = tempOutNames;
145                         outputTypes["r_nseqs"] = tempOutNames;
146                         
147                         //if the user changes the input directory command factory will send this info to us in the output parameter 
148                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
149                         if (inputDir == "not found"){   inputDir = "";          }
150                         else {
151                                 string path;
152                                 it = parameters.find("shared");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
158                                 }
159                                 
160                                 it = parameters.find("rabund");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["rabund"] = inputDir + it->second;           }
166                                 }
167                                 
168                                 it = parameters.find("sabund");
169                                 //user has given a template file
170                                 if(it != parameters.end()){ 
171                                         path = m->hasPath(it->second);
172                                         //if the user has not given a path then, add inputdir. else leave path alone.
173                                         if (path == "") {       parameters["sabund"] = inputDir + it->second;           }
174                                 }
175                                 
176                                 it = parameters.find("list");
177                                 //user has given a template file
178                                 if(it != parameters.end()){ 
179                                         path = m->hasPath(it->second);
180                                         //if the user has not given a path then, add inputdir. else leave path alone.
181                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
182                                 }
183                         }
184                         
185                         //check for required parameters
186                         listfile = validParameter.validFile(parameters, "list", true);
187                         if (listfile == "not open") { listfile = ""; abort = true; }
188                         else if (listfile == "not found") { listfile = ""; }
189                         else {  format = "list"; inputfile = listfile; m->setListFile(listfile); }
190                         
191                         sabundfile = validParameter.validFile(parameters, "sabund", true);
192                         if (sabundfile == "not open") { sabundfile = ""; abort = true; }        
193                         else if (sabundfile == "not found") { sabundfile = ""; }
194                         else {  format = "sabund"; inputfile = sabundfile; m->setSabundFile(sabundfile); }
195                         
196                         rabundfile = validParameter.validFile(parameters, "rabund", true);
197                         if (rabundfile == "not open") { rabundfile = ""; abort = true; }        
198                         else if (rabundfile == "not found") { rabundfile = ""; }
199                         else {  format = "rabund"; inputfile = rabundfile; m->setRabundFile(rabundfile); }
200                         
201                         sharedfile = validParameter.validFile(parameters, "shared", true);
202                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
203                         else if (sharedfile == "not found") { sharedfile = ""; }
204                         else {  format = "sharedfile"; inputfile = sharedfile; m->setSharedFile(sharedfile); }
205                                 
206                         if ((sharedfile == "") && (listfile == "") && (rabundfile == "") && (sabundfile == "")) { 
207                                 //is there are current file available for any of these?
208                                 //give priority to shared, then list, then rabund, then sabund
209                                 //if there is a current shared file, use it
210                                 sharedfile = m->getSharedFile(); 
211                                 if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
212                                 else { 
213                                         listfile = m->getListFile(); 
214                                         if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
215                                         else { 
216                                                 rabundfile = m->getRabundFile(); 
217                                                 if (rabundfile != "") { inputfile = rabundfile; format = "rabund"; m->mothurOut("Using " + rabundfile + " as input file for the rabund parameter."); m->mothurOutEndLine(); }
218                                                 else { 
219                                                         sabundfile = m->getSabundFile(); 
220                                                         if (sabundfile != "") { inputfile = sabundfile; format = "sabund"; m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter."); m->mothurOutEndLine(); }
221                                                         else { 
222                                                                 m->mothurOut("No valid current files. You must provide a list, sabund, rabund or shared file before you can use the collect.single command."); m->mothurOutEndLine(); 
223                                                                 abort = true;
224                                                         }
225                                                 }
226                                         }
227                                 }
228                         }
229                         
230                         //if the user changes the output directory command factory will send this info to us in the output parameter 
231                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputfile);              }
232
233                         //check for optional parameter and set defaults
234                         // ...at some point should added some additional type checking...
235                         label = validParameter.validFile(parameters, "label", false);                   
236                         if (label == "not found") { label = ""; }
237                         else { 
238                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
239                                 else { allLines = 1;  }
240                         }
241                                 
242                         calc = validParameter.validFile(parameters, "calc", false);                     
243                         if (calc == "not found") { calc = "sobs";  }
244                         else { 
245                                  if (calc == "default")  {  calc = "sobs";  }
246                         }
247                         m->splitAtDash(calc, Estimators);
248                         if (m->inUsersGroups("citation", Estimators)) { 
249                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
250                                 //remove citation from list of calcs
251                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
252                         }
253
254                         string temp;
255                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
256                         m->mothurConvert(temp, freq); 
257                         
258                         temp = validParameter.validFile(parameters, "abund", false);                    if (temp == "not found") { temp = "10"; }
259                         m->mothurConvert(temp, abund); 
260                         
261                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
262                         m->mothurConvert(temp, nIters); 
263                         
264                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
265                         m->setProcessors(temp);
266                         m->mothurConvert(temp, processors);
267                         
268                         temp = validParameter.validFile(parameters, "groupmode", false);                if (temp == "not found") { temp = "T"; }
269                         groupMode = m->isTrue(temp);
270                 }
271                 
272         }
273         catch(exception& e) {
274                 m->errorOut(e, "RareFactCommand", "RareFactCommand");
275                 exit(1);
276         }
277 }
278 //**********************************************************************************************************************
279
280 int RareFactCommand::execute(){
281         try {
282         
283                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
284                 
285                 if ((format != "sharedfile")) { inputFileNames.push_back(inputfile);  }
286                 else {  inputFileNames = parseSharedFile(sharedfile);  format = "rabund"; }
287                                 
288                 if (m->control_pressed) { return 0; }
289                 
290                 map<int, string> file2Group; //index in outputNames[i] -> group
291                 for (int p = 0; p < inputFileNames.size(); p++) {
292                         
293                         string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p]));
294                                                 
295                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }  m->clearGroups();  return 0; }
296                         
297                         if (inputFileNames.size() > 1) {
298                                 m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine();
299                         }
300                         int i;
301                         ValidCalculators validCalculator;
302                         
303                           
304                         for (i=0; i<Estimators.size(); i++) {
305                                 if (validCalculator.isValidCalculator("rarefaction", Estimators[i]) == true) { 
306                                         if (Estimators[i] == "sobs") { 
307                                                 rDisplays.push_back(new RareDisplay(new Sobs(), new ThreeColumnFile(fileNameRoot+"rarefaction")));
308                                                 outputNames.push_back(fileNameRoot+"rarefaction"); outputTypes["rarefaction"].push_back(fileNameRoot+"rarefaction");
309                                         }else if (Estimators[i] == "chao") { 
310                                                 rDisplays.push_back(new RareDisplay(new Chao1(), new ThreeColumnFile(fileNameRoot+"r_chao")));
311                                                 outputNames.push_back(fileNameRoot+"r_chao"); outputTypes["r_chao"].push_back(fileNameRoot+"r_chao");
312                                         }else if (Estimators[i] == "ace") { 
313                                                 if(abund < 5)
314                                                         abund = 10;
315                                                 rDisplays.push_back(new RareDisplay(new Ace(abund), new ThreeColumnFile(fileNameRoot+"r_ace")));
316                                                 outputNames.push_back(fileNameRoot+"r_ace"); outputTypes["r_ace"].push_back(fileNameRoot+"r_ace");
317                                         }else if (Estimators[i] == "jack") { 
318                                                 rDisplays.push_back(new RareDisplay(new Jackknife(), new ThreeColumnFile(fileNameRoot+"r_jack")));
319                                                 outputNames.push_back(fileNameRoot+"r_jack"); outputTypes["r_jack"].push_back(fileNameRoot+"r_jack");
320                                         }else if (Estimators[i] == "shannon") { 
321                                                 rDisplays.push_back(new RareDisplay(new Shannon(), new ThreeColumnFile(fileNameRoot+"r_shannon")));
322                                                 outputNames.push_back(fileNameRoot+"r_shannon"); outputTypes["r_shannon"].push_back(fileNameRoot+"r_shannon");
323                                         }else if (Estimators[i] == "shannoneven") { 
324                                                 rDisplays.push_back(new RareDisplay(new ShannonEven(), new ThreeColumnFile(fileNameRoot+"r_shannoneven")));
325                                                 outputNames.push_back(fileNameRoot+"r_shannoneven"); outputTypes["r_shannoneven"].push_back(fileNameRoot+"r_shannoneven");
326                                         }else if (Estimators[i] == "heip") { 
327                                                 rDisplays.push_back(new RareDisplay(new Heip(), new ThreeColumnFile(fileNameRoot+"r_heip")));
328                                                 outputNames.push_back(fileNameRoot+"r_heip"); outputTypes["r_heip"].push_back(fileNameRoot+"r_heip");
329                                         }else if (Estimators[i] == "smithwilson") { 
330                                                 rDisplays.push_back(new RareDisplay(new SmithWilson(), new ThreeColumnFile(fileNameRoot+"r_smithwilson")));
331                                                 outputNames.push_back(fileNameRoot+"r_smithwilson"); outputTypes["r_smithwilson"].push_back(fileNameRoot+"r_smithwilson");
332                                         }else if (Estimators[i] == "npshannon") { 
333                                                 rDisplays.push_back(new RareDisplay(new NPShannon(), new ThreeColumnFile(fileNameRoot+"r_npshannon")));
334                                                 outputNames.push_back(fileNameRoot+"r_npshannon"); outputTypes["r_npshannon"].push_back(fileNameRoot+"r_npshannon");
335                                         }else if (Estimators[i] == "simpson") { 
336                                                 rDisplays.push_back(new RareDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"r_simpson")));
337                                                 outputNames.push_back(fileNameRoot+"r_simpson"); outputTypes["r_simpson"].push_back(fileNameRoot+"r_simpson");
338                                         }else if (Estimators[i] == "simpsoneven") { 
339                                                 rDisplays.push_back(new RareDisplay(new SimpsonEven(), new ThreeColumnFile(fileNameRoot+"r_simpsoneven")));
340                                                 outputNames.push_back(fileNameRoot+"r_simpsoneven"); outputTypes["r_simpsoneven"].push_back(fileNameRoot+"r_simpsoneven");
341                                         }else if (Estimators[i] == "invsimpson") { 
342                                                 rDisplays.push_back(new RareDisplay(new InvSimpson(), new ThreeColumnFile(fileNameRoot+"r_invsimpson")));
343                                                 outputNames.push_back(fileNameRoot+"r_invsimpson"); outputTypes["r_invsimpson"].push_back(fileNameRoot+"r_invsimpson");
344                                         }else if (Estimators[i] == "bootstrap") { 
345                                                 rDisplays.push_back(new RareDisplay(new Bootstrap(), new ThreeColumnFile(fileNameRoot+"r_bootstrap")));
346                                                 outputNames.push_back(fileNameRoot+"r_bootstrap"); outputTypes["r_bootstrap"].push_back(fileNameRoot+"r_bootstrap");
347                                         }else if (Estimators[i] == "coverage") { 
348                                                 rDisplays.push_back(new RareDisplay(new Coverage(), new ThreeColumnFile(fileNameRoot+"r_coverage")));
349                                                 outputNames.push_back(fileNameRoot+"r_coverage"); outputTypes["r_coverage"].push_back(fileNameRoot+"r_coverage");
350                                         }else if (Estimators[i] == "nseqs") { 
351                                                 rDisplays.push_back(new RareDisplay(new NSeqs(), new ThreeColumnFile(fileNameRoot+"r_nseqs")));
352                                                 outputNames.push_back(fileNameRoot+"r_nseqs"); outputTypes["r_nseqs"].push_back(fileNameRoot+"r_nseqs");
353                                         }
354                     if (inputFileNames.size() > 1) { file2Group[outputNames.size()-1] = groups[p]; }
355                                 }
356                         }
357                         
358                         
359                         //if the users entered no valid calculators don't execute command
360                         if (rDisplays.size() == 0) { for(int i=0;i<rDisplays.size();i++){       delete rDisplays[i];    }  return 0; }
361                         
362                         input = new InputData(inputFileNames[p], format);                       
363                         order = input->getOrderVector();
364                         string lastLabel = order->getLabel();
365                         
366                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
367                         set<string> processedLabels;
368                         set<string> userLabels = labels;
369                         
370                         if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;  delete order;  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
371                         
372                         //as long as you are not at the end of the file or done wih the lines you want
373                         while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
374                                 
375                                 if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;  delete order;  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
376
377                                 
378                                 if(allLines == 1 || labels.count(order->getLabel()) == 1){
379                                         
380                                         m->mothurOut(order->getLabel()); m->mothurOutEndLine();
381                                         rCurve = new Rarefact(order, rDisplays, processors);
382                                         rCurve->getCurve(freq, nIters);
383                                         delete rCurve;
384                                         
385                                         processedLabels.insert(order->getLabel());
386                                         userLabels.erase(order->getLabel());
387                                 }
388                                 
389                                 if ((m->anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
390                                         string saveLabel = order->getLabel();
391                                         
392                                         delete order;
393                                         order = (input->getOrderVector(lastLabel));
394                                         
395                                         m->mothurOut(order->getLabel()); m->mothurOutEndLine();
396                                         rCurve = new Rarefact(order, rDisplays, processors);
397                                         rCurve->getCurve(freq, nIters);
398                                         delete rCurve;
399                                         
400                                         processedLabels.insert(order->getLabel());
401                                         userLabels.erase(order->getLabel());
402                                         
403                                         //restore real lastlabel to save below
404                                         order->setLabel(saveLabel);
405                                 }
406                                 
407                                 lastLabel = order->getLabel();          
408                                 
409                                 delete order;
410                                 order = (input->getOrderVector());
411                         }
412                         
413                         if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;   for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
414
415                         //output error messages about any remaining user labels
416                         set<string>::iterator it;
417                         bool needToRun = false;
418                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
419                                 m->mothurOut("Your file does not include the label " + *it);
420                                 if (processedLabels.count(lastLabel) != 1) {
421                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
422                                         needToRun = true;
423                                 }else {
424                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
425                                 }
426                         }
427                         
428                         if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;   for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
429
430                         //run last label if you need to
431                         if (needToRun == true)  {
432                                 if (order != NULL) {    delete order;   }
433                                 order = (input->getOrderVector(lastLabel));
434                                 
435                                 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
436                                 rCurve = new Rarefact(order, rDisplays, processors);
437                                 rCurve->getCurve(freq, nIters);
438                                 delete rCurve;
439                                 
440                                 delete order;
441                         }
442                         
443                         
444                         for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }       
445                         rDisplays.clear();
446                         delete input;  
447                 }
448                 
449                 
450                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
451
452                 //create summary file containing all the groups data for each label - this function just combines the info from the files already created.
453                 if ((sharedfile != "") && (groupMode)) {   outputNames = createGroupFile(outputNames, file2Group);  }
454
455                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
456
457                 m->mothurOutEndLine();
458                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
459                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
460                 m->mothurOutEndLine();
461
462                 return 0;
463         }
464         catch(exception& e) {
465                 m->errorOut(e, "RareFactCommand", "execute");
466                 exit(1);
467         }
468 }
469 //**********************************************************************************************************************
470 vector<string> RareFactCommand::createGroupFile(vector<string>& outputNames, map<int, string> file2Group) {
471         try {
472                 
473                 vector<string> newFileNames;
474                 
475                 //find different types of files
476                 map<string, map<string, string> > typesFiles;
477         map<string, vector< vector<string> > > fileLabels; //combofile name to labels. each label is a vector because it may be unique lci hci.
478         vector<string> groupNames;
479                 for (int i = 0; i < outputNames.size(); i++) {
480             
481                         string extension = m->getExtension(outputNames[i]);
482             string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
483                         m->mothurRemove(combineFileName); //remove old file
484             
485                         ifstream in;
486                         m->openInputFile(outputNames[i], in);
487                         
488                         string labels = m->getline(in);
489             
490                         istringstream iss (labels,istringstream::in);
491             string newLabel = ""; vector<string> theseLabels;
492             while(!iss.eof()) {  iss >> newLabel; m->gobble(iss); theseLabels.push_back(newLabel); }
493             vector< vector<string> > allLabels;
494             vector<string> thisSet; thisSet.push_back(theseLabels[0]); allLabels.push_back(thisSet); thisSet.clear(); //makes "numSampled" its own grouping
495             for (int j = 1; j < theseLabels.size()-1; j++) {
496                 if (theseLabels[j+1] == "lci") {
497                     thisSet.push_back(theseLabels[j]); 
498                     thisSet.push_back(theseLabels[j+1]); 
499                     thisSet.push_back(theseLabels[j+2]);
500                     j++; j++;
501                 }else{ //no lci or hci for this calc.
502                     thisSet.push_back(theseLabels[j]); 
503                 }
504                 allLabels.push_back(thisSet); 
505                 thisSet.clear();
506             }
507             fileLabels[combineFileName] = allLabels;
508                     
509             map<string, map<string, string> >::iterator itfind = typesFiles.find(extension);
510             if (itfind != typesFiles.end()) {
511                 (itfind->second)[outputNames[i]] = file2Group[i];
512             }else {
513                 map<string, string> temp;  
514                 temp[outputNames[i]] = file2Group[i];
515                 typesFiles[extension] = temp;
516             }
517             if (!(m->inUsersGroups(file2Group[i], groupNames))) {  groupNames.push_back(file2Group[i]); }
518                 }
519                 
520                 //for each type create a combo file
521                 
522                 for (map<string, map<string, string> >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) {
523                         
524                         ofstream out;
525                         string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + it->first;
526                         m->openOutputFileAppend(combineFileName, out);
527                         newFileNames.push_back(combineFileName);
528                         map<string, string> thisTypesFiles = it->second; //it->second maps filename to group
529             set<int> numSampledSet;
530             
531                         //open each type summary file
532                         map<string, map<int, vector< vector<string> > > > files; //maps file name to lines in file
533                         int maxLines = 0;
534                         for (map<string, string>::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) {
535                 
536                 string thisfilename = itFileNameGroup->first;
537                 string group = itFileNameGroup->second;
538                 
539                                 ifstream temp;
540                                 m->openInputFile(thisfilename, temp);
541                                 
542                                 //read through first line - labels
543                                 m->getline(temp);       m->gobble(temp);
544                                 
545                                 map<int, vector< vector<string> > > thisFilesLines;
546                                 while (!temp.eof()){
547                     int numSampled = 0;
548                     temp >> numSampled; m->gobble(temp);
549                 
550                     vector< vector<string> > theseReads;
551                     vector<string> thisSet; thisSet.push_back(toString(numSampled)); theseReads.push_back(thisSet); thisSet.clear();
552                     for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
553                         vector<string> reads;
554                         string next = "";
555                         for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
556                             temp >> next; m->gobble(temp);
557                             reads.push_back(next);
558                         }
559                         theseReads.push_back(reads);
560                     }
561                     thisFilesLines[numSampled] = theseReads;
562                     m->gobble(temp);
563                    
564                     numSampledSet.insert(numSampled);
565                                 }
566                                 
567                                 files[group] = thisFilesLines;
568                                 
569                                 //save longest file for below
570                                 if (maxLines < thisFilesLines.size()) { maxLines = thisFilesLines.size(); }
571                                 
572                                 temp.close();
573                                 m->mothurRemove(thisfilename);
574                         }
575                         
576             //output new labels line
577             out << fileLabels[combineFileName][0][0] << '\t';
578             for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
579                 for (int n = 0; n < groupNames.size(); n++) { // for each group
580                     for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
581                         out << fileLabels[combineFileName][k][l] << '-' << groupNames[n] << '\t';
582                     }
583                 }
584             }
585                         out << endl;
586             
587                         //for each label
588                         for (set<int>::iterator itNumSampled = numSampledSet.begin(); itNumSampled != numSampledSet.end(); itNumSampled++) {
589                                 
590                 out << (*itNumSampled) << '\t';
591                                
592                 if (m->control_pressed) { break; }
593                 
594                 for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //each chunk
595                                     //grab data for each group
596                     for (map<string, map<int, vector< vector<string> > > >::iterator itFileNameGroup = files.begin(); itFileNameGroup != files.end(); itFileNameGroup++) {
597                         
598                         string group = itFileNameGroup->first;
599                        
600                         map<int, vector< vector<string> > >::iterator itLine = files[group].find(*itNumSampled);
601                         if (itLine != files[group].end()) { 
602                             for (int l = 0; l < (itLine->second)[k].size(); l++) { 
603                                 out << (itLine->second)[k][l] << '\t';
604                                
605                             }                             
606                         }else { 
607                             for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { 
608                                 out << "NA" << '\t';
609                             } 
610                         }
611                     }
612                 }
613                 out << endl;
614                         }       
615                         out.close();
616                 }
617                 
618                 //return combine file name
619                 return newFileNames;
620                 
621         }
622         catch(exception& e) {
623                 m->errorOut(e, "RareFactCommand", "createGroupFile");
624                 exit(1);
625         }
626 }
627 //**********************************************************************************************************************
628 vector<string> RareFactCommand::parseSharedFile(string filename) {
629         try {
630                 vector<string> filenames;
631                 
632                 map<string, ofstream*> filehandles;
633                 map<string, ofstream*>::iterator it3;
634                 
635                 input = new InputData(filename, "sharedfile");
636                 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
637                 
638                 string sharedFileRoot = m->getRootName(filename);
639                 
640                 //clears file before we start to write to it below
641                 for (int i=0; i<lookup.size(); i++) {
642                         m->mothurRemove((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
643                         filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
644                 }
645                 
646                 ofstream* temp;
647                 for (int i=0; i<lookup.size(); i++) {
648                         temp = new ofstream;
649                         filehandles[lookup[i]->getGroup()] = temp;
650                         groups.push_back(lookup[i]->getGroup());
651                 }
652
653                 while(lookup[0] != NULL) {
654                 
655                         for (int i = 0; i < lookup.size(); i++) {
656                                 RAbundVector rav = lookup[i]->getRAbundVector();
657                                 m->openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".rabund", *(filehandles[lookup[i]->getGroup()]));
658                                 rav.print(*(filehandles[lookup[i]->getGroup()]));
659                                 (*(filehandles[lookup[i]->getGroup()])).close();
660                         }
661                 
662                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
663                         lookup = input->getSharedRAbundVectors();
664                 }
665                 
666                 //free memory
667                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
668                         delete it3->second;
669                 }
670                 
671                 delete input;
672                 m->clearGroups();
673
674                 return filenames;
675         }
676         catch(exception& e) {
677                 m->errorOut(e, "RareFactCommand", "parseSharedFile");
678                 exit(1);
679         }
680 }
681 //**********************************************************************************************************************
682
683
684