]> git.donarmstrong.com Git - mothur.git/blob - rarefactcommand.cpp
forced rarefaction.single to output ending line for all groups. added subsample...
[mothur.git] / rarefactcommand.cpp
1 /*
2  *  rarefactcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "rarefactcommand.h"
11 #include "ace.h"
12 #include "sobs.h"
13 #include "nseqs.h"
14 #include "chao1.h"
15 #include "bootstrap.h"
16 #include "simpson.h"
17 #include "simpsoneven.h"
18 #include "heip.h"
19 #include "smithwilson.h"
20 #include "invsimpson.h"
21 #include "npshannon.h"
22 #include "shannoneven.h"
23 #include "shannon.h"
24 #include "jackknife.h"
25 #include "coverage.h"
26
27
28 //**********************************************************************************************************************
29 vector<string> RareFactCommand::setParameters(){        
30         try {
31                 CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(plist);
32                 CommandParameter prabund("rabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(prabund);
33                 CommandParameter psabund("sabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(psabund);
34                 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(pshared);
35                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
36                 CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
37                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
38                 CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap", "sobs", "", "", "",true,false); parameters.push_back(pcalc);
39                 CommandParameter pabund("abund", "Number", "", "10", "", "", "",false,false); parameters.push_back(pabund);
40                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
41                 CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pgroupmode);
42                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
43                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
44                 
45                 vector<string> myArray;
46                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
47                 return myArray;
48         }
49         catch(exception& e) {
50                 m->errorOut(e, "RareFactCommand", "setParameters");
51                 exit(1);
52         }
53 }
54 //**********************************************************************************************************************
55 string RareFactCommand::getHelpString(){        
56         try {
57                 ValidCalculators validCalculator;
58                 string helpString = "";
59                 helpString += "The rarefaction.single command parameters are list, sabund, rabund, shared, label, iters, freq, calc, processors and abund.  list, sabund, rabund or shared is required unless you have a valid current file. \n";
60                 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
61                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
62                 helpString += "The rarefaction.single command should be in the following format: \n";
63                 helpString += "rarefaction.single(label=yourLabel, iters=yourIters, freq=yourFreq, calc=yourEstimators).\n";
64                 helpString += "Example rarefaction.single(label=unique-.01-.03, iters=10000, freq=10, calc=sobs-rchao-race-rjack-rbootstrap-rshannon-rnpshannon-rsimpson).\n";
65                 helpString += "The default values for iters is 1000, freq is 100, and calc is rarefaction which calculates the rarefaction curve for the observed richness.\n";
66                 validCalculator.printCalc("rarefaction");
67                 helpString += "If you are running rarefaction.single with a shared file and would like your results collated in one file, set groupmode=t. (Default=true).\n";
68                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
69                 helpString += "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq).\n";
70                 return helpString;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "RareFactCommand", "getHelpString");
74                 exit(1);
75         }
76 }
77
78 //**********************************************************************************************************************
79 RareFactCommand::RareFactCommand(){     
80         try {
81                 abort = true; calledHelp = true; 
82                 setParameters();
83                 vector<string> tempOutNames;
84                 outputTypes["rarefaction"] = tempOutNames;
85                 outputTypes["r_chao"] = tempOutNames;
86                 outputTypes["r_ace"] = tempOutNames;
87                 outputTypes["r_jack"] = tempOutNames;
88                 outputTypes["r_shannon"] = tempOutNames;
89                 outputTypes["r_shannoneven"] = tempOutNames;
90                 outputTypes["r_heip"] = tempOutNames;
91                 outputTypes["r_smithwilson"] = tempOutNames;
92                 outputTypes["r_npshannon"] = tempOutNames;
93                 outputTypes["r_simpson"] = tempOutNames;
94                 outputTypes["r_simpsoneven"] = tempOutNames;
95                 outputTypes["r_invsimpson"] = tempOutNames;
96                 outputTypes["r_bootstrap"] = tempOutNames;
97                 outputTypes["r_coverage"] = tempOutNames;
98                 outputTypes["r_nseqs"] = tempOutNames;
99         }
100         catch(exception& e) {
101                 m->errorOut(e, "RareFactCommand", "RareFactCommand");
102                 exit(1);
103         }
104 }
105 //**********************************************************************************************************************
106 RareFactCommand::RareFactCommand(string option)  {
107         try {
108                 abort = false; calledHelp = false;   
109                 allLines = 1;
110                                                 
111                 //allow user to run help
112                 if(option == "help") { help(); abort = true; calledHelp = true; }
113                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
114                 
115                 else {
116                         vector<string> myArray = setParameters();
117                         
118                         OptionParser parser(option);
119                         map<string,string> parameters = parser.getParameters();
120                         map<string,string>::iterator it;
121                         
122                         ValidParameters validParameter;
123                 
124                         //check to make sure all parameters are valid for command
125                         for (it = parameters.begin(); it != parameters.end(); it++) { 
126                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
127                         }
128                         
129                         //initialize outputTypes
130                         vector<string> tempOutNames;
131                         outputTypes["rarefaction"] = tempOutNames;
132                         outputTypes["r_chao"] = tempOutNames;
133                         outputTypes["r_ace"] = tempOutNames;
134                         outputTypes["r_jack"] = tempOutNames;
135                         outputTypes["r_shannon"] = tempOutNames;
136                         outputTypes["r_shannoneven"] = tempOutNames;
137                         outputTypes["r_heip"] = tempOutNames;
138                         outputTypes["r_smithwilson"] = tempOutNames;
139                         outputTypes["r_npshannon"] = tempOutNames;
140                         outputTypes["r_simpson"] = tempOutNames;
141                         outputTypes["r_simpsoneven"] = tempOutNames;
142                         outputTypes["r_invsimpson"] = tempOutNames;
143                         outputTypes["r_bootstrap"] = tempOutNames;
144                         outputTypes["r_coverage"] = tempOutNames;
145                         outputTypes["r_nseqs"] = tempOutNames;
146                         
147                         //if the user changes the input directory command factory will send this info to us in the output parameter 
148                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
149                         if (inputDir == "not found"){   inputDir = "";          }
150                         else {
151                                 string path;
152                                 it = parameters.find("shared");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
158                                 }
159                                 
160                                 it = parameters.find("rabund");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["rabund"] = inputDir + it->second;           }
166                                 }
167                                 
168                                 it = parameters.find("sabund");
169                                 //user has given a template file
170                                 if(it != parameters.end()){ 
171                                         path = m->hasPath(it->second);
172                                         //if the user has not given a path then, add inputdir. else leave path alone.
173                                         if (path == "") {       parameters["sabund"] = inputDir + it->second;           }
174                                 }
175                                 
176                                 it = parameters.find("list");
177                                 //user has given a template file
178                                 if(it != parameters.end()){ 
179                                         path = m->hasPath(it->second);
180                                         //if the user has not given a path then, add inputdir. else leave path alone.
181                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
182                                 }
183                         }
184                         
185                         //check for required parameters
186                         listfile = validParameter.validFile(parameters, "list", true);
187                         if (listfile == "not open") { listfile = ""; abort = true; }
188                         else if (listfile == "not found") { listfile = ""; }
189                         else {  format = "list"; inputfile = listfile; m->setListFile(listfile); }
190                         
191                         sabundfile = validParameter.validFile(parameters, "sabund", true);
192                         if (sabundfile == "not open") { sabundfile = ""; abort = true; }        
193                         else if (sabundfile == "not found") { sabundfile = ""; }
194                         else {  format = "sabund"; inputfile = sabundfile; m->setSabundFile(sabundfile); }
195                         
196                         rabundfile = validParameter.validFile(parameters, "rabund", true);
197                         if (rabundfile == "not open") { rabundfile = ""; abort = true; }        
198                         else if (rabundfile == "not found") { rabundfile = ""; }
199                         else {  format = "rabund"; inputfile = rabundfile; m->setRabundFile(rabundfile); }
200                         
201                         sharedfile = validParameter.validFile(parameters, "shared", true);
202                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
203                         else if (sharedfile == "not found") { sharedfile = ""; }
204                         else {  format = "sharedfile"; inputfile = sharedfile; m->setSharedFile(sharedfile); }
205                                 
206                         if ((sharedfile == "") && (listfile == "") && (rabundfile == "") && (sabundfile == "")) { 
207                                 //is there are current file available for any of these?
208                                 //give priority to shared, then list, then rabund, then sabund
209                                 //if there is a current shared file, use it
210                                 sharedfile = m->getSharedFile(); 
211                                 if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
212                                 else { 
213                                         listfile = m->getListFile(); 
214                                         if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
215                                         else { 
216                                                 rabundfile = m->getRabundFile(); 
217                                                 if (rabundfile != "") { inputfile = rabundfile; format = "rabund"; m->mothurOut("Using " + rabundfile + " as input file for the rabund parameter."); m->mothurOutEndLine(); }
218                                                 else { 
219                                                         sabundfile = m->getSabundFile(); 
220                                                         if (sabundfile != "") { inputfile = sabundfile; format = "sabund"; m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter."); m->mothurOutEndLine(); }
221                                                         else { 
222                                                                 m->mothurOut("No valid current files. You must provide a list, sabund, rabund or shared file before you can use the collect.single command."); m->mothurOutEndLine(); 
223                                                                 abort = true;
224                                                         }
225                                                 }
226                                         }
227                                 }
228                         }
229                         
230                         //if the user changes the output directory command factory will send this info to us in the output parameter 
231                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputfile);              }
232
233                         //check for optional parameter and set defaults
234                         // ...at some point should added some additional type checking...
235                         label = validParameter.validFile(parameters, "label", false);                   
236                         if (label == "not found") { label = ""; }
237                         else { 
238                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
239                                 else { allLines = 1;  }
240                         }
241                                 
242                         calc = validParameter.validFile(parameters, "calc", false);                     
243                         if (calc == "not found") { calc = "sobs";  }
244                         else { 
245                                  if (calc == "default")  {  calc = "sobs";  }
246                         }
247                         m->splitAtDash(calc, Estimators);
248                         if (m->inUsersGroups("citation", Estimators)) { 
249                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
250                                 //remove citation from list of calcs
251                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
252                         }
253
254                         string temp;
255                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
256                         m->mothurConvert(temp, freq); 
257                         
258                         temp = validParameter.validFile(parameters, "abund", false);                    if (temp == "not found") { temp = "10"; }
259                         m->mothurConvert(temp, abund); 
260                         
261                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
262                         m->mothurConvert(temp, nIters); 
263                         
264                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
265                         m->setProcessors(temp);
266                         m->mothurConvert(temp, processors);
267                         
268                         temp = validParameter.validFile(parameters, "groupmode", false);                if (temp == "not found") { temp = "T"; }
269                         groupMode = m->isTrue(temp);
270                 }
271                 
272         }
273         catch(exception& e) {
274                 m->errorOut(e, "RareFactCommand", "RareFactCommand");
275                 exit(1);
276         }
277 }
278 //**********************************************************************************************************************
279
280 int RareFactCommand::execute(){
281         try {
282         
283                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
284                 
285         map<string, set<int> > labelToEnds;
286                 if ((format != "sharedfile")) { inputFileNames.push_back(inputfile);  }
287                 else {  inputFileNames = parseSharedFile(sharedfile, labelToEnds);  format = "rabund"; }
288                 for (map<string, set<int> >::iterator it = labelToEnds.begin(); it != labelToEnds.end(); it++) {
289             cout << it->first << endl;
290             for (set<int>::iterator its = (it->second).begin(); its != (it->second).end(); its++) {
291                 cout << (*its) << endl;
292             }
293         }
294                 if (m->control_pressed) { return 0; }
295                 
296                 map<int, string> file2Group; //index in outputNames[i] -> group
297                 for (int p = 0; p < inputFileNames.size(); p++) {
298                         
299                         string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p]));
300                                                 
301                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }  m->clearGroups();  return 0; }
302                         
303                         if (inputFileNames.size() > 1) {
304                                 m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine();
305                         }
306                         int i;
307                         ValidCalculators validCalculator;
308                         
309                           
310                         for (i=0; i<Estimators.size(); i++) {
311                                 if (validCalculator.isValidCalculator("rarefaction", Estimators[i]) == true) { 
312                                         if (Estimators[i] == "sobs") { 
313                                                 rDisplays.push_back(new RareDisplay(new Sobs(), new ThreeColumnFile(fileNameRoot+"rarefaction")));
314                                                 outputNames.push_back(fileNameRoot+"rarefaction"); outputTypes["rarefaction"].push_back(fileNameRoot+"rarefaction");
315                                         }else if (Estimators[i] == "chao") { 
316                                                 rDisplays.push_back(new RareDisplay(new Chao1(), new ThreeColumnFile(fileNameRoot+"r_chao")));
317                                                 outputNames.push_back(fileNameRoot+"r_chao"); outputTypes["r_chao"].push_back(fileNameRoot+"r_chao");
318                                         }else if (Estimators[i] == "ace") { 
319                                                 if(abund < 5)
320                                                         abund = 10;
321                                                 rDisplays.push_back(new RareDisplay(new Ace(abund), new ThreeColumnFile(fileNameRoot+"r_ace")));
322                                                 outputNames.push_back(fileNameRoot+"r_ace"); outputTypes["r_ace"].push_back(fileNameRoot+"r_ace");
323                                         }else if (Estimators[i] == "jack") { 
324                                                 rDisplays.push_back(new RareDisplay(new Jackknife(), new ThreeColumnFile(fileNameRoot+"r_jack")));
325                                                 outputNames.push_back(fileNameRoot+"r_jack"); outputTypes["r_jack"].push_back(fileNameRoot+"r_jack");
326                                         }else if (Estimators[i] == "shannon") { 
327                                                 rDisplays.push_back(new RareDisplay(new Shannon(), new ThreeColumnFile(fileNameRoot+"r_shannon")));
328                                                 outputNames.push_back(fileNameRoot+"r_shannon"); outputTypes["r_shannon"].push_back(fileNameRoot+"r_shannon");
329                                         }else if (Estimators[i] == "shannoneven") { 
330                                                 rDisplays.push_back(new RareDisplay(new ShannonEven(), new ThreeColumnFile(fileNameRoot+"r_shannoneven")));
331                                                 outputNames.push_back(fileNameRoot+"r_shannoneven"); outputTypes["r_shannoneven"].push_back(fileNameRoot+"r_shannoneven");
332                                         }else if (Estimators[i] == "heip") { 
333                                                 rDisplays.push_back(new RareDisplay(new Heip(), new ThreeColumnFile(fileNameRoot+"r_heip")));
334                                                 outputNames.push_back(fileNameRoot+"r_heip"); outputTypes["r_heip"].push_back(fileNameRoot+"r_heip");
335                                         }else if (Estimators[i] == "smithwilson") { 
336                                                 rDisplays.push_back(new RareDisplay(new SmithWilson(), new ThreeColumnFile(fileNameRoot+"r_smithwilson")));
337                                                 outputNames.push_back(fileNameRoot+"r_smithwilson"); outputTypes["r_smithwilson"].push_back(fileNameRoot+"r_smithwilson");
338                                         }else if (Estimators[i] == "npshannon") { 
339                                                 rDisplays.push_back(new RareDisplay(new NPShannon(), new ThreeColumnFile(fileNameRoot+"r_npshannon")));
340                                                 outputNames.push_back(fileNameRoot+"r_npshannon"); outputTypes["r_npshannon"].push_back(fileNameRoot+"r_npshannon");
341                                         }else if (Estimators[i] == "simpson") { 
342                                                 rDisplays.push_back(new RareDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"r_simpson")));
343                                                 outputNames.push_back(fileNameRoot+"r_simpson"); outputTypes["r_simpson"].push_back(fileNameRoot+"r_simpson");
344                                         }else if (Estimators[i] == "simpsoneven") { 
345                                                 rDisplays.push_back(new RareDisplay(new SimpsonEven(), new ThreeColumnFile(fileNameRoot+"r_simpsoneven")));
346                                                 outputNames.push_back(fileNameRoot+"r_simpsoneven"); outputTypes["r_simpsoneven"].push_back(fileNameRoot+"r_simpsoneven");
347                                         }else if (Estimators[i] == "invsimpson") { 
348                                                 rDisplays.push_back(new RareDisplay(new InvSimpson(), new ThreeColumnFile(fileNameRoot+"r_invsimpson")));
349                                                 outputNames.push_back(fileNameRoot+"r_invsimpson"); outputTypes["r_invsimpson"].push_back(fileNameRoot+"r_invsimpson");
350                                         }else if (Estimators[i] == "bootstrap") { 
351                                                 rDisplays.push_back(new RareDisplay(new Bootstrap(), new ThreeColumnFile(fileNameRoot+"r_bootstrap")));
352                                                 outputNames.push_back(fileNameRoot+"r_bootstrap"); outputTypes["r_bootstrap"].push_back(fileNameRoot+"r_bootstrap");
353                                         }else if (Estimators[i] == "coverage") { 
354                                                 rDisplays.push_back(new RareDisplay(new Coverage(), new ThreeColumnFile(fileNameRoot+"r_coverage")));
355                                                 outputNames.push_back(fileNameRoot+"r_coverage"); outputTypes["r_coverage"].push_back(fileNameRoot+"r_coverage");
356                                         }else if (Estimators[i] == "nseqs") { 
357                                                 rDisplays.push_back(new RareDisplay(new NSeqs(), new ThreeColumnFile(fileNameRoot+"r_nseqs")));
358                                                 outputNames.push_back(fileNameRoot+"r_nseqs"); outputTypes["r_nseqs"].push_back(fileNameRoot+"r_nseqs");
359                                         }
360                     if (inputFileNames.size() > 1) { file2Group[outputNames.size()-1] = groups[p]; }
361                                 }
362                         }
363                         
364                         
365                         //if the users entered no valid calculators don't execute command
366                         if (rDisplays.size() == 0) { for(int i=0;i<rDisplays.size();i++){       delete rDisplays[i];    }  return 0; }
367                         
368                         input = new InputData(inputFileNames[p], format);                       
369                         order = input->getOrderVector();
370                         string lastLabel = order->getLabel();
371                         
372                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
373                         set<string> processedLabels;
374                         set<string> userLabels = labels;
375                         
376                         if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;  delete order;  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
377                         
378                         //as long as you are not at the end of the file or done wih the lines you want
379                         while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
380                                 
381                                 if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;  delete order;  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
382
383                                 
384                                 if(allLines == 1 || labels.count(order->getLabel()) == 1){
385                                         
386                                         m->mothurOut(order->getLabel()); m->mothurOutEndLine();
387                     map<string, set<int> >::iterator itEndings = labelToEnds.find(order->getLabel());
388                     set<int> ends;
389                     if (itEndings != labelToEnds.end()) { ends = itEndings->second; }
390                                         rCurve = new Rarefact(order, rDisplays, processors, ends);
391                                         rCurve->getCurve(freq, nIters);
392                                         delete rCurve;
393                                         
394                                         processedLabels.insert(order->getLabel());
395                                         userLabels.erase(order->getLabel());
396                                 }
397                                 
398                                 if ((m->anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
399                                         string saveLabel = order->getLabel();
400                                         
401                                         delete order;
402                                         order = (input->getOrderVector(lastLabel));
403                                         
404                                         m->mothurOut(order->getLabel()); m->mothurOutEndLine();
405                                         map<string, set<int> >::iterator itEndings = labelToEnds.find(order->getLabel());
406                     set<int> ends;
407                     if (itEndings != labelToEnds.end()) { ends = itEndings->second; }
408                                         rCurve = new Rarefact(order, rDisplays, processors, ends);
409
410                                         rCurve->getCurve(freq, nIters);
411                                         delete rCurve;
412                                         
413                                         processedLabels.insert(order->getLabel());
414                                         userLabels.erase(order->getLabel());
415                                         
416                                         //restore real lastlabel to save below
417                                         order->setLabel(saveLabel);
418                                 }
419                                 
420                                 lastLabel = order->getLabel();          
421                                 
422                                 delete order;
423                                 order = (input->getOrderVector());
424                         }
425                         
426                         if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;   for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
427
428                         //output error messages about any remaining user labels
429                         set<string>::iterator it;
430                         bool needToRun = false;
431                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
432                                 m->mothurOut("Your file does not include the label " + *it);
433                                 if (processedLabels.count(lastLabel) != 1) {
434                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
435                                         needToRun = true;
436                                 }else {
437                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
438                                 }
439                         }
440                         
441                         if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;   for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
442
443                         //run last label if you need to
444                         if (needToRun == true)  {
445                                 if (order != NULL) {    delete order;   }
446                                 order = (input->getOrderVector(lastLabel));
447                                 
448                                 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
449                                 map<string, set<int> >::iterator itEndings = labelToEnds.find(order->getLabel());
450                 set<int> ends;
451                 if (itEndings != labelToEnds.end()) { ends = itEndings->second; }
452                 rCurve = new Rarefact(order, rDisplays, processors, ends);
453
454                                 rCurve->getCurve(freq, nIters);
455                                 delete rCurve;
456                                 
457                                 delete order;
458                         }
459                         
460                         
461                         for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }       
462                         rDisplays.clear();
463                         delete input;  
464                 }
465                 
466                 
467                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
468
469                 //create summary file containing all the groups data for each label - this function just combines the info from the files already created.
470                 if ((sharedfile != "") && (groupMode)) {   outputNames = createGroupFile(outputNames, file2Group);  }
471
472                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
473
474                 m->mothurOutEndLine();
475                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
476                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
477                 m->mothurOutEndLine();
478
479                 return 0;
480         }
481         catch(exception& e) {
482                 m->errorOut(e, "RareFactCommand", "execute");
483                 exit(1);
484         }
485 }
486 //**********************************************************************************************************************
487 vector<string> RareFactCommand::createGroupFile(vector<string>& outputNames, map<int, string> file2Group) {
488         try {
489                 
490                 vector<string> newFileNames;
491                 
492                 //find different types of files
493                 map<string, map<string, string> > typesFiles;
494         map<string, vector< vector<string> > > fileLabels; //combofile name to labels. each label is a vector because it may be unique lci hci.
495         vector<string> groupNames;
496                 for (int i = 0; i < outputNames.size(); i++) {
497             
498                         string extension = m->getExtension(outputNames[i]);
499             string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
500                         m->mothurRemove(combineFileName); //remove old file
501             
502                         ifstream in;
503                         m->openInputFile(outputNames[i], in);
504                         
505                         string labels = m->getline(in);
506             
507                         istringstream iss (labels,istringstream::in);
508             string newLabel = ""; vector<string> theseLabels;
509             while(!iss.eof()) {  iss >> newLabel; m->gobble(iss); theseLabels.push_back(newLabel); }
510             vector< vector<string> > allLabels;
511             vector<string> thisSet; thisSet.push_back(theseLabels[0]); allLabels.push_back(thisSet); thisSet.clear(); //makes "numSampled" its own grouping
512             for (int j = 1; j < theseLabels.size()-1; j++) {
513                 if (theseLabels[j+1] == "lci") {
514                     thisSet.push_back(theseLabels[j]); 
515                     thisSet.push_back(theseLabels[j+1]); 
516                     thisSet.push_back(theseLabels[j+2]);
517                     j++; j++;
518                 }else{ //no lci or hci for this calc.
519                     thisSet.push_back(theseLabels[j]); 
520                 }
521                 allLabels.push_back(thisSet); 
522                 thisSet.clear();
523             }
524             fileLabels[combineFileName] = allLabels;
525                     
526             map<string, map<string, string> >::iterator itfind = typesFiles.find(extension);
527             if (itfind != typesFiles.end()) {
528                 (itfind->second)[outputNames[i]] = file2Group[i];
529             }else {
530                 map<string, string> temp;  
531                 temp[outputNames[i]] = file2Group[i];
532                 typesFiles[extension] = temp;
533             }
534             if (!(m->inUsersGroups(file2Group[i], groupNames))) {  groupNames.push_back(file2Group[i]); }
535                 }
536                 
537                 //for each type create a combo file
538                 
539                 for (map<string, map<string, string> >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) {
540                         
541                         ofstream out;
542                         string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + it->first;
543                         m->openOutputFileAppend(combineFileName, out);
544                         newFileNames.push_back(combineFileName);
545                         map<string, string> thisTypesFiles = it->second; //it->second maps filename to group
546             set<int> numSampledSet;
547             
548                         //open each type summary file
549                         map<string, map<int, vector< vector<string> > > > files; //maps file name to lines in file
550                         int maxLines = 0;
551                         for (map<string, string>::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) {
552                 
553                 string thisfilename = itFileNameGroup->first;
554                 string group = itFileNameGroup->second;
555                 
556                                 ifstream temp;
557                                 m->openInputFile(thisfilename, temp);
558                                 
559                                 //read through first line - labels
560                                 m->getline(temp);       m->gobble(temp);
561                                 
562                                 map<int, vector< vector<string> > > thisFilesLines;
563                                 while (!temp.eof()){
564                     int numSampled = 0;
565                     temp >> numSampled; m->gobble(temp);
566                 
567                     vector< vector<string> > theseReads;
568                     vector<string> thisSet; thisSet.push_back(toString(numSampled)); theseReads.push_back(thisSet); thisSet.clear();
569                     for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
570                         vector<string> reads;
571                         string next = "";
572                         for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
573                             temp >> next; m->gobble(temp);
574                             reads.push_back(next);
575                         }
576                         theseReads.push_back(reads);
577                     }
578                     thisFilesLines[numSampled] = theseReads;
579                     m->gobble(temp);
580                    
581                     numSampledSet.insert(numSampled);
582                                 }
583                                 
584                                 files[group] = thisFilesLines;
585                                 
586                                 //save longest file for below
587                                 if (maxLines < thisFilesLines.size()) { maxLines = thisFilesLines.size(); }
588                                 
589                                 temp.close();
590                                 m->mothurRemove(thisfilename);
591                         }
592                         
593             //output new labels line
594             out << fileLabels[combineFileName][0][0] << '\t';
595             for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //output thing like 0.03-A lci-A hci-A
596                 for (int n = 0; n < groupNames.size(); n++) { // for each group
597                     for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { //output modified labels
598                         out << fileLabels[combineFileName][k][l] << '-' << groupNames[n] << '\t';
599                     }
600                 }
601             }
602                         out << endl;
603             
604                         //for each label
605                         for (set<int>::iterator itNumSampled = numSampledSet.begin(); itNumSampled != numSampledSet.end(); itNumSampled++) {
606                                 
607                 out << (*itNumSampled) << '\t';
608                                
609                 if (m->control_pressed) { break; }
610                 
611                 for (int k = 1; k < fileLabels[combineFileName].size(); k++) { //each chunk
612                                     //grab data for each group
613                     for (map<string, map<int, vector< vector<string> > > >::iterator itFileNameGroup = files.begin(); itFileNameGroup != files.end(); itFileNameGroup++) {
614                         
615                         string group = itFileNameGroup->first;
616                        
617                         map<int, vector< vector<string> > >::iterator itLine = files[group].find(*itNumSampled);
618                         if (itLine != files[group].end()) { 
619                             for (int l = 0; l < (itLine->second)[k].size(); l++) { 
620                                 out << (itLine->second)[k][l] << '\t';
621                                
622                             }                             
623                         }else { 
624                             for (int l = 0; l < fileLabels[combineFileName][k].size(); l++) { 
625                                 out << "NA" << '\t';
626                             } 
627                         }
628                     }
629                 }
630                 out << endl;
631                         }       
632                         out.close();
633                 }
634                 
635                 //return combine file name
636                 return newFileNames;
637                 
638         }
639         catch(exception& e) {
640                 m->errorOut(e, "RareFactCommand", "createGroupFile");
641                 exit(1);
642         }
643 }
644 //**********************************************************************************************************************
645 vector<string> RareFactCommand::parseSharedFile(string filename, map<string, set<int> >& label2Ends) {
646         try {
647                 vector<string> filenames;
648                 
649                 map<string, ofstream*> filehandles;
650                 map<string, ofstream*>::iterator it3;
651                 
652                 input = new InputData(filename, "sharedfile");
653                 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
654                 
655                 string sharedFileRoot = m->getRootName(filename);
656                 
657                 //clears file before we start to write to it below
658                 for (int i=0; i<lookup.size(); i++) {
659                         m->mothurRemove((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
660                         filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
661                 }
662                 
663                 ofstream* temp;
664                 for (int i=0; i<lookup.size(); i++) {
665                         temp = new ofstream;
666                         filehandles[lookup[i]->getGroup()] = temp;
667                         groups.push_back(lookup[i]->getGroup());
668                 }
669
670                 while(lookup[0] != NULL) {
671                 
672                         for (int i = 0; i < lookup.size(); i++) {
673                                 RAbundVector rav = lookup[i]->getRAbundVector();
674                                 m->openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".rabund", *(filehandles[lookup[i]->getGroup()]));
675                                 rav.print(*(filehandles[lookup[i]->getGroup()]));
676                                 (*(filehandles[lookup[i]->getGroup()])).close();
677                 label2Ends[lookup[i]->getLabel()].insert(rav.getNumSeqs());
678                         }
679                 
680                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
681                         lookup = input->getSharedRAbundVectors();
682                 }
683                 
684                 //free memory
685                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
686                         delete it3->second;
687                 }
688                 
689                 delete input;
690                 m->clearGroups();
691
692                 return filenames;
693         }
694         catch(exception& e) {
695                 m->errorOut(e, "RareFactCommand", "parseSharedFile");
696                 exit(1);
697         }
698 }
699 //**********************************************************************************************************************
700
701
702