]> git.donarmstrong.com Git - mothur.git/blob - rarefactcommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / rarefactcommand.cpp
1 /*
2  *  rarefactcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "rarefactcommand.h"
11 #include "ace.h"
12 #include "sobs.h"
13 #include "nseqs.h"
14 #include "chao1.h"
15 #include "bootstrap.h"
16 #include "simpson.h"
17 #include "simpsoneven.h"
18 #include "heip.h"
19 #include "smithwilson.h"
20 #include "invsimpson.h"
21 #include "npshannon.h"
22 #include "shannoneven.h"
23 #include "shannon.h"
24 #include "jackknife.h"
25 #include "coverage.h"
26
27
28 //**********************************************************************************************************************
29 vector<string> RareFactCommand::setParameters(){        
30         try {
31                 CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(plist);
32                 CommandParameter prabund("rabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(prabund);
33                 CommandParameter psabund("sabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(psabund);
34                 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(pshared);
35                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
36                 CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
37                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
38                 CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap", "sobs", "", "", "",true,false); parameters.push_back(pcalc);
39                 CommandParameter pabund("abund", "Number", "", "10", "", "", "",false,false); parameters.push_back(pabund);
40                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
41                 CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pgroupmode);
42                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
43                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
44                 
45                 vector<string> myArray;
46                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
47                 return myArray;
48         }
49         catch(exception& e) {
50                 m->errorOut(e, "RareFactCommand", "setParameters");
51                 exit(1);
52         }
53 }
54 //**********************************************************************************************************************
55 string RareFactCommand::getHelpString(){        
56         try {
57                 ValidCalculators validCalculator;
58                 string helpString = "";
59                 helpString += "The rarefaction.single command parameters are list, sabund, rabund, shared, label, iters, freq, calc, processors and abund.  list, sabund, rabund or shared is required unless you have a valid current file. \n";
60                 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
61                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
62                 helpString += "The rarefaction.single command should be in the following format: \n";
63                 helpString += "rarefaction.single(label=yourLabel, iters=yourIters, freq=yourFreq, calc=yourEstimators).\n";
64                 helpString += "Example rarefaction.single(label=unique-.01-.03, iters=10000, freq=10, calc=sobs-rchao-race-rjack-rbootstrap-rshannon-rnpshannon-rsimpson).\n";
65                 helpString += "The default values for iters is 1000, freq is 100, and calc is rarefaction which calculates the rarefaction curve for the observed richness.\n";
66                 validCalculator.printCalc("rarefaction");
67                 helpString += "If you are running rarefaction.single with a shared file and would like your results collated in one file, set groupmode=t. (Default=true).\n";
68                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
69                 helpString += "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq).\n";
70                 return helpString;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "RareFactCommand", "getHelpString");
74                 exit(1);
75         }
76 }
77
78 //**********************************************************************************************************************
79 RareFactCommand::RareFactCommand(){     
80         try {
81                 abort = true; calledHelp = true; 
82                 setParameters();
83                 vector<string> tempOutNames;
84                 outputTypes["rarefaction"] = tempOutNames;
85                 outputTypes["r_chao"] = tempOutNames;
86                 outputTypes["r_ace"] = tempOutNames;
87                 outputTypes["r_jack"] = tempOutNames;
88                 outputTypes["r_shannon"] = tempOutNames;
89                 outputTypes["r_shannoneven"] = tempOutNames;
90                 outputTypes["r_heip"] = tempOutNames;
91                 outputTypes["r_smithwilson"] = tempOutNames;
92                 outputTypes["r_npshannon"] = tempOutNames;
93                 outputTypes["r_simpson"] = tempOutNames;
94                 outputTypes["r_simpsoneven"] = tempOutNames;
95                 outputTypes["r_invsimpson"] = tempOutNames;
96                 outputTypes["r_bootstrap"] = tempOutNames;
97                 outputTypes["r_coverage"] = tempOutNames;
98                 outputTypes["r_nseqs"] = tempOutNames;
99         }
100         catch(exception& e) {
101                 m->errorOut(e, "RareFactCommand", "RareFactCommand");
102                 exit(1);
103         }
104 }
105 //**********************************************************************************************************************
106 RareFactCommand::RareFactCommand(string option)  {
107         try {
108                 abort = false; calledHelp = false;   
109                 allLines = 1;
110                                                 
111                 //allow user to run help
112                 if(option == "help") { help(); abort = true; calledHelp = true; }
113                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
114                 
115                 else {
116                         vector<string> myArray = setParameters();
117                         
118                         OptionParser parser(option);
119                         map<string,string> parameters = parser.getParameters();
120                         map<string,string>::iterator it;
121                         
122                         ValidParameters validParameter;
123                 
124                         //check to make sure all parameters are valid for command
125                         for (it = parameters.begin(); it != parameters.end(); it++) { 
126                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
127                         }
128                         
129                         //initialize outputTypes
130                         vector<string> tempOutNames;
131                         outputTypes["rarefaction"] = tempOutNames;
132                         outputTypes["r_chao"] = tempOutNames;
133                         outputTypes["r_ace"] = tempOutNames;
134                         outputTypes["r_jack"] = tempOutNames;
135                         outputTypes["r_shannon"] = tempOutNames;
136                         outputTypes["r_shannoneven"] = tempOutNames;
137                         outputTypes["r_heip"] = tempOutNames;
138                         outputTypes["r_smithwilson"] = tempOutNames;
139                         outputTypes["r_npshannon"] = tempOutNames;
140                         outputTypes["r_simpson"] = tempOutNames;
141                         outputTypes["r_simpsoneven"] = tempOutNames;
142                         outputTypes["r_invsimpson"] = tempOutNames;
143                         outputTypes["r_bootstrap"] = tempOutNames;
144                         outputTypes["r_coverage"] = tempOutNames;
145                         outputTypes["r_nseqs"] = tempOutNames;
146                         
147                         //if the user changes the input directory command factory will send this info to us in the output parameter 
148                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
149                         if (inputDir == "not found"){   inputDir = "";          }
150                         else {
151                                 string path;
152                                 it = parameters.find("shared");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
158                                 }
159                                 
160                                 it = parameters.find("rabund");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["rabund"] = inputDir + it->second;           }
166                                 }
167                                 
168                                 it = parameters.find("sabund");
169                                 //user has given a template file
170                                 if(it != parameters.end()){ 
171                                         path = m->hasPath(it->second);
172                                         //if the user has not given a path then, add inputdir. else leave path alone.
173                                         if (path == "") {       parameters["sabund"] = inputDir + it->second;           }
174                                 }
175                                 
176                                 it = parameters.find("list");
177                                 //user has given a template file
178                                 if(it != parameters.end()){ 
179                                         path = m->hasPath(it->second);
180                                         //if the user has not given a path then, add inputdir. else leave path alone.
181                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
182                                 }
183                         }
184                         
185                         //check for required parameters
186                         listfile = validParameter.validFile(parameters, "list", true);
187                         if (listfile == "not open") { listfile = ""; abort = true; }
188                         else if (listfile == "not found") { listfile = ""; }
189                         else {  format = "list"; inputfile = listfile; m->setListFile(listfile); }
190                         
191                         sabundfile = validParameter.validFile(parameters, "sabund", true);
192                         if (sabundfile == "not open") { sabundfile = ""; abort = true; }        
193                         else if (sabundfile == "not found") { sabundfile = ""; }
194                         else {  format = "sabund"; inputfile = sabundfile; m->setSabundFile(sabundfile); }
195                         
196                         rabundfile = validParameter.validFile(parameters, "rabund", true);
197                         if (rabundfile == "not open") { rabundfile = ""; abort = true; }        
198                         else if (rabundfile == "not found") { rabundfile = ""; }
199                         else {  format = "rabund"; inputfile = rabundfile; m->setRabundFile(rabundfile); }
200                         
201                         sharedfile = validParameter.validFile(parameters, "shared", true);
202                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
203                         else if (sharedfile == "not found") { sharedfile = ""; }
204                         else {  format = "sharedfile"; inputfile = sharedfile; m->setSharedFile(sharedfile); }
205                                 
206                         if ((sharedfile == "") && (listfile == "") && (rabundfile == "") && (sabundfile == "")) { 
207                                 //is there are current file available for any of these?
208                                 //give priority to shared, then list, then rabund, then sabund
209                                 //if there is a current shared file, use it
210                                 sharedfile = m->getSharedFile(); 
211                                 if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
212                                 else { 
213                                         listfile = m->getListFile(); 
214                                         if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
215                                         else { 
216                                                 rabundfile = m->getRabundFile(); 
217                                                 if (rabundfile != "") { inputfile = rabundfile; format = "rabund"; m->mothurOut("Using " + rabundfile + " as input file for the rabund parameter."); m->mothurOutEndLine(); }
218                                                 else { 
219                                                         sabundfile = m->getSabundFile(); 
220                                                         if (sabundfile != "") { inputfile = sabundfile; format = "sabund"; m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter."); m->mothurOutEndLine(); }
221                                                         else { 
222                                                                 m->mothurOut("No valid current files. You must provide a list, sabund, rabund or shared file before you can use the collect.single command."); m->mothurOutEndLine(); 
223                                                                 abort = true;
224                                                         }
225                                                 }
226                                         }
227                                 }
228                         }
229                         
230                         //if the user changes the output directory command factory will send this info to us in the output parameter 
231                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputfile);              }
232
233                         //check for optional parameter and set defaults
234                         // ...at some point should added some additional type checking...
235                         label = validParameter.validFile(parameters, "label", false);                   
236                         if (label == "not found") { label = ""; }
237                         else { 
238                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
239                                 else { allLines = 1;  }
240                         }
241                                 
242                         calc = validParameter.validFile(parameters, "calc", false);                     
243                         if (calc == "not found") { calc = "sobs";  }
244                         else { 
245                                  if (calc == "default")  {  calc = "sobs";  }
246                         }
247                         m->splitAtDash(calc, Estimators);
248                         if (m->inUsersGroups("citation", Estimators)) { 
249                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
250                                 //remove citation from list of calcs
251                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
252                         }
253
254                         string temp;
255                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
256                         m->mothurConvert(temp, freq); 
257                         
258                         temp = validParameter.validFile(parameters, "abund", false);                    if (temp == "not found") { temp = "10"; }
259                         m->mothurConvert(temp, abund); 
260                         
261                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
262                         m->mothurConvert(temp, nIters); 
263                         
264                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
265                         m->setProcessors(temp);
266                         m->mothurConvert(temp, processors);
267                         
268                         temp = validParameter.validFile(parameters, "groupmode", false);                if (temp == "not found") { temp = "T"; }
269                         groupMode = m->isTrue(temp);
270                 }
271                 
272         }
273         catch(exception& e) {
274                 m->errorOut(e, "RareFactCommand", "RareFactCommand");
275                 exit(1);
276         }
277 }
278 //**********************************************************************************************************************
279
280 int RareFactCommand::execute(){
281         try {
282         
283                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
284                 
285                 if ((format != "sharedfile")) { inputFileNames.push_back(inputfile);  }
286                 else {  inputFileNames = parseSharedFile(sharedfile);  format = "rabund"; }
287                                 
288                 if (m->control_pressed) { return 0; }
289                 
290                 map<int, string> file2Group; //index in outputNames[i] -> group
291                 for (int p = 0; p < inputFileNames.size(); p++) {
292                         
293                         string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p]));
294                                                 
295                         if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }  m->clearGroups();  return 0; }
296                         
297                         if (inputFileNames.size() > 1) {
298                                 m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine();
299                         }
300                         int i;
301                         ValidCalculators validCalculator;
302                         
303                           
304                         for (i=0; i<Estimators.size(); i++) {
305                                 if (validCalculator.isValidCalculator("rarefaction", Estimators[i]) == true) { 
306                                         if (Estimators[i] == "sobs") { 
307                                                 rDisplays.push_back(new RareDisplay(new Sobs(), new ThreeColumnFile(fileNameRoot+"rarefaction")));
308                                                 outputNames.push_back(fileNameRoot+"rarefaction"); outputTypes["rarefaction"].push_back(fileNameRoot+"rarefaction");
309                                         }else if (Estimators[i] == "chao") { 
310                                                 rDisplays.push_back(new RareDisplay(new Chao1(), new ThreeColumnFile(fileNameRoot+"r_chao")));
311                                                 outputNames.push_back(fileNameRoot+"r_chao"); outputTypes["r_chao"].push_back(fileNameRoot+"r_chao");
312                                         }else if (Estimators[i] == "ace") { 
313                                                 if(abund < 5)
314                                                         abund = 10;
315                                                 rDisplays.push_back(new RareDisplay(new Ace(abund), new ThreeColumnFile(fileNameRoot+"r_ace")));
316                                                 outputNames.push_back(fileNameRoot+"r_ace"); outputTypes["r_ace"].push_back(fileNameRoot+"r_ace");
317                                         }else if (Estimators[i] == "jack") { 
318                                                 rDisplays.push_back(new RareDisplay(new Jackknife(), new ThreeColumnFile(fileNameRoot+"r_jack")));
319                                                 outputNames.push_back(fileNameRoot+"r_jack"); outputTypes["r_jack"].push_back(fileNameRoot+"r_jack");
320                                         }else if (Estimators[i] == "shannon") { 
321                                                 rDisplays.push_back(new RareDisplay(new Shannon(), new ThreeColumnFile(fileNameRoot+"r_shannon")));
322                                                 outputNames.push_back(fileNameRoot+"r_shannon"); outputTypes["r_shannon"].push_back(fileNameRoot+"r_shannon");
323                                         }else if (Estimators[i] == "shannoneven") { 
324                                                 rDisplays.push_back(new RareDisplay(new ShannonEven(), new ThreeColumnFile(fileNameRoot+"r_shannoneven")));
325                                                 outputNames.push_back(fileNameRoot+"r_shannoneven"); outputTypes["r_shannoneven"].push_back(fileNameRoot+"r_shannoneven");
326                                         }else if (Estimators[i] == "heip") { 
327                                                 rDisplays.push_back(new RareDisplay(new Heip(), new ThreeColumnFile(fileNameRoot+"r_heip")));
328                                                 outputNames.push_back(fileNameRoot+"r_heip"); outputTypes["r_heip"].push_back(fileNameRoot+"r_heip");
329                                         }else if (Estimators[i] == "smithwilson") { 
330                                                 rDisplays.push_back(new RareDisplay(new SmithWilson(), new ThreeColumnFile(fileNameRoot+"r_smithwilson")));
331                                                 outputNames.push_back(fileNameRoot+"r_smithwilson"); outputTypes["r_smithwilson"].push_back(fileNameRoot+"r_smithwilson");
332                                         }else if (Estimators[i] == "npshannon") { 
333                                                 rDisplays.push_back(new RareDisplay(new NPShannon(), new ThreeColumnFile(fileNameRoot+"r_npshannon")));
334                                                 outputNames.push_back(fileNameRoot+"r_npshannon"); outputTypes["r_npshannon"].push_back(fileNameRoot+"r_npshannon");
335                                         }else if (Estimators[i] == "simpson") { 
336                                                 rDisplays.push_back(new RareDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"r_simpson")));
337                                                 outputNames.push_back(fileNameRoot+"r_simpson"); outputTypes["r_simpson"].push_back(fileNameRoot+"r_simpson");
338                                         }else if (Estimators[i] == "simpsoneven") { 
339                                                 rDisplays.push_back(new RareDisplay(new SimpsonEven(), new ThreeColumnFile(fileNameRoot+"r_simpsoneven")));
340                                                 outputNames.push_back(fileNameRoot+"r_simpsoneven"); outputTypes["r_simpsoneven"].push_back(fileNameRoot+"r_simpsoneven");
341                                         }else if (Estimators[i] == "invsimpson") { 
342                                                 rDisplays.push_back(new RareDisplay(new InvSimpson(), new ThreeColumnFile(fileNameRoot+"r_invsimpson")));
343                                                 outputNames.push_back(fileNameRoot+"r_invsimpson"); outputTypes["r_invsimpson"].push_back(fileNameRoot+"r_invsimpson");
344                                         }else if (Estimators[i] == "bootstrap") { 
345                                                 rDisplays.push_back(new RareDisplay(new Bootstrap(), new ThreeColumnFile(fileNameRoot+"r_bootstrap")));
346                                                 outputNames.push_back(fileNameRoot+"r_bootstrap"); outputTypes["r_bootstrap"].push_back(fileNameRoot+"r_bootstrap");
347                                         }else if (Estimators[i] == "coverage") { 
348                                                 rDisplays.push_back(new RareDisplay(new Coverage(), new ThreeColumnFile(fileNameRoot+"r_coverage")));
349                                                 outputNames.push_back(fileNameRoot+"r_coverage"); outputTypes["r_coverage"].push_back(fileNameRoot+"r_coverage");
350                                         }else if (Estimators[i] == "nseqs") { 
351                                                 rDisplays.push_back(new RareDisplay(new NSeqs(), new ThreeColumnFile(fileNameRoot+"r_nseqs")));
352                                                 outputNames.push_back(fileNameRoot+"r_nseqs"); outputTypes["r_nseqs"].push_back(fileNameRoot+"r_nseqs");
353                                         }
354                     if (inputFileNames.size() > 1) { file2Group[outputNames.size()-1] = groups[p]; }
355                                 }
356                         }
357                         
358                         
359                         //if the users entered no valid calculators don't execute command
360                         if (rDisplays.size() == 0) { for(int i=0;i<rDisplays.size();i++){       delete rDisplays[i];    }  return 0; }
361                         
362                         input = new InputData(inputFileNames[p], format);                       
363                         order = input->getOrderVector();
364                         string lastLabel = order->getLabel();
365                         
366                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
367                         set<string> processedLabels;
368                         set<string> userLabels = labels;
369                         
370                         if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;  delete order;  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
371                         
372                         //as long as you are not at the end of the file or done wih the lines you want
373                         while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
374                                 
375                                 if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;  delete order;  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
376
377                                 
378                                 if(allLines == 1 || labels.count(order->getLabel()) == 1){
379                                         
380                                         m->mothurOut(order->getLabel()); m->mothurOutEndLine();
381                                         rCurve = new Rarefact(order, rDisplays, processors);
382                                         rCurve->getCurve(freq, nIters);
383                                         delete rCurve;
384                                         
385                                         processedLabels.insert(order->getLabel());
386                                         userLabels.erase(order->getLabel());
387                                 }
388                                 
389                                 if ((m->anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
390                                         string saveLabel = order->getLabel();
391                                         
392                                         delete order;
393                                         order = (input->getOrderVector(lastLabel));
394                                         
395                                         m->mothurOut(order->getLabel()); m->mothurOutEndLine();
396                                         rCurve = new Rarefact(order, rDisplays, processors);
397                                         rCurve->getCurve(freq, nIters);
398                                         delete rCurve;
399                                         
400                                         processedLabels.insert(order->getLabel());
401                                         userLabels.erase(order->getLabel());
402                                         
403                                         //restore real lastlabel to save below
404                                         order->setLabel(saveLabel);
405                                 }
406                                 
407                                 lastLabel = order->getLabel();          
408                                 
409                                 delete order;
410                                 order = (input->getOrderVector());
411                         }
412                         
413                         if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;   for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
414
415                         //output error messages about any remaining user labels
416                         set<string>::iterator it;
417                         bool needToRun = false;
418                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
419                                 m->mothurOut("Your file does not include the label " + *it);
420                                 if (processedLabels.count(lastLabel) != 1) {
421                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
422                                         needToRun = true;
423                                 }else {
424                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
425                                 }
426                         }
427                         
428                         if (m->control_pressed) { for(int i=0;i<rDisplays.size();i++){  delete rDisplays[i];    }  delete input;   for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
429
430                         //run last label if you need to
431                         if (needToRun == true)  {
432                                 if (order != NULL) {    delete order;   }
433                                 order = (input->getOrderVector(lastLabel));
434                                 
435                                 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
436                                 rCurve = new Rarefact(order, rDisplays, processors);
437                                 rCurve->getCurve(freq, nIters);
438                                 delete rCurve;
439                                 
440                                 delete order;
441                         }
442                         
443                         
444                         for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }       
445                         rDisplays.clear();
446                         delete input;  
447                 }
448                 
449                 
450                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
451
452                 //create summary file containing all the groups data for each label - this function just combines the info from the files already created.
453                 if ((sharedfile != "") && (groupMode)) {   outputNames = createGroupFile(outputNames, file2Group);  }
454
455                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
456
457                 m->mothurOutEndLine();
458                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
459                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
460                 m->mothurOutEndLine();
461
462                 return 0;
463         }
464         catch(exception& e) {
465                 m->errorOut(e, "RareFactCommand", "execute");
466                 exit(1);
467         }
468 }
469 //**********************************************************************************************************************
470 vector<string> RareFactCommand::createGroupFile(vector<string>& outputNames, map<int, string> file2Group) {
471         try {
472                 
473                 vector<string> newFileNames;
474                 
475                 //find different types of files
476                 map<string, map<string, string> > typesFiles;
477                 for (int i = 0; i < outputNames.size(); i++) {
478                         string extension = m->getExtension(outputNames[i]);
479                         
480                         ifstream in;
481                         m->openInputFile(outputNames[i], in);
482                         
483                         string labels = m->getline(in);
484                         string newLine = labels.substr(0, labels.find_first_of('\t'));
485                         
486                         newLine += "\tGroup" + labels.substr(labels.find_first_of('\t'));
487             
488             map<string, map<string, string> >::iterator itfind = typesFiles.find(extension);
489             if (itfind != typesFiles.end()) {
490                 (itfind->second)[outputNames[i]] = file2Group[i];
491             }else {
492                 map<string, string> temp;  
493                 temp[outputNames[i]] = file2Group[i];
494                 typesFiles[extension] = temp;
495             }
496                         
497                         string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
498                         
499                         //print headers
500                         ofstream out;
501                         m->openOutputFile(combineFileName, out);
502                         out << newLine << endl;
503                         out.close();
504                         
505                 }
506                 
507                 //for each type create a combo file
508                 map<int, int> lineToNumber; 
509                 for (map<string, map<string, string> >::iterator it = typesFiles.begin(); it != typesFiles.end(); it++) {
510                         
511                         ofstream out;
512                         string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + it->first;
513                         m->openOutputFileAppend(combineFileName, out);
514                         newFileNames.push_back(combineFileName);
515                         map<string, string> thisTypesFiles = it->second;
516                 
517                         //open each type summary file
518                         map<string, vector<string> > files; //maps file name to lines in file
519                         int maxLines = 0;
520                         int numColumns = 0;
521                         for (map<string, string>::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) {
522                 
523                 string thisfilename = itFileNameGroup->first;
524                 string group = itFileNameGroup->second;
525                
526                                 ifstream temp;
527                                 m->openInputFile(thisfilename, temp);
528                                 
529                                 //read through first line - labels
530                                 m->getline(temp);       m->gobble(temp);
531                                 
532                                 vector<string> thisFilesLines;
533                                 
534                                 thisFilesLines.push_back(group);
535                                 int count = 1;
536                                 while (!temp.eof()){
537                                 
538                                         string thisLine = m->getline(temp);
539                                         
540                                         string numSampled = thisLine.substr(0, thisLine.find_first_of('\t'));
541                                         int num = 0;
542                                         convert(numSampled, num);
543                                         numColumns = m->getNumChar(thisLine, '\t');
544                                         lineToNumber[count] = num;
545                                         count++;
546                                                                         
547                                         thisFilesLines.push_back(thisLine);
548                                         m->gobble(temp);
549                                 }
550                                 
551                                 files[thisfilename] = thisFilesLines;
552                                 
553                                 //save longest file for below
554                                 if (maxLines < thisFilesLines.size()) { maxLines = thisFilesLines.size(); }
555                                 
556                                 temp.close();
557                                 m->mothurRemove(thisfilename);
558                         }
559                         
560                         
561                         //for each label
562                         for (int k = 1; k < maxLines; k++) {
563                                 
564                                 //grab data for each group
565                                 for (map<string, string>::iterator itFileNameGroup = thisTypesFiles.begin(); itFileNameGroup != thisTypesFiles.end(); itFileNameGroup++) {
566                     
567                                         string thisfilename = itFileNameGroup->first;
568                                         map<int, int>::iterator itLine = lineToNumber.find(k);
569                                         if (itLine != lineToNumber.end()) {
570                                                 string output = toString(itLine->second);
571                                                 if (k < files[thisfilename].size()) {
572                                                         string line = files[thisfilename][k];
573                                                         output = line.substr(0, line.find_first_of('\t'));
574                                                         output += '\t' + files[thisfilename][0] + '\t' + line.substr(line.find_first_of('\t'));
575                                                 }else{
576                                                         output += '\t' + files[thisfilename][0] + '\t';
577                                                         for (int h = 0; h < numColumns; h++) {
578                                                                 output += "NA\t";
579                                                         }
580                                                 }
581                                                 out << output << endl;
582                                         }else { m->mothurOut("[ERROR]: parsing results, cant find " + toString(k)); m->mothurOutEndLine(); }
583                                 }
584                         }       
585                         
586                         out.close();
587                         
588                 }
589                 
590                 //return combine file name
591                 return newFileNames;
592                 
593         }
594         catch(exception& e) {
595                 m->errorOut(e, "RareFactCommand", "createGroupFile");
596                 exit(1);
597         }
598 }
599 //**********************************************************************************************************************
600 vector<string> RareFactCommand::parseSharedFile(string filename) {
601         try {
602                 vector<string> filenames;
603                 
604                 map<string, ofstream*> filehandles;
605                 map<string, ofstream*>::iterator it3;
606                 
607                 input = new InputData(filename, "sharedfile");
608                 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
609                 
610                 string sharedFileRoot = m->getRootName(filename);
611                 
612                 //clears file before we start to write to it below
613                 for (int i=0; i<lookup.size(); i++) {
614                         m->mothurRemove((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
615                         filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
616                 }
617                 
618                 ofstream* temp;
619                 for (int i=0; i<lookup.size(); i++) {
620                         temp = new ofstream;
621                         filehandles[lookup[i]->getGroup()] = temp;
622                         groups.push_back(lookup[i]->getGroup());
623                 }
624
625                 while(lookup[0] != NULL) {
626                 
627                         for (int i = 0; i < lookup.size(); i++) {
628                                 RAbundVector rav = lookup[i]->getRAbundVector();
629                                 m->openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".rabund", *(filehandles[lookup[i]->getGroup()]));
630                                 rav.print(*(filehandles[lookup[i]->getGroup()]));
631                                 (*(filehandles[lookup[i]->getGroup()])).close();
632                         }
633                 
634                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
635                         lookup = input->getSharedRAbundVectors();
636                 }
637                 
638                 //free memory
639                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
640                         delete it3->second;
641                 }
642                 
643                 delete input;
644                 m->clearGroups();
645
646                 return filenames;
647         }
648         catch(exception& e) {
649                 m->errorOut(e, "RareFactCommand", "parseSharedFile");
650                 exit(1);
651         }
652 }
653 //**********************************************************************************************************************
654
655
656