]> git.donarmstrong.com Git - mothur.git/blob - summarycommand.cpp
85f0970f25930f563616c5fba9bc27fecda9cf98
[mothur.git] / summarycommand.cpp
1 /*
2  *  summarycommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarycommand.h"
11 #include "ace.h"
12 #include "sobs.h"
13 #include "nseqs.h"
14 #include "chao1.h"
15 #include "bootstrap.h"
16 #include "simpson.h"
17 #include "simpsoneven.h"
18 #include "invsimpson.h"
19 #include "npshannon.h"
20 #include "shannon.h"
21 #include "heip.h"
22 #include "smithwilson.h"
23 #include "shannoneven.h"
24 #include "jackknife.h"
25 #include "geom.h"
26 #include "logsd.h"
27 #include "qstat.h"
28 #include "bergerparker.h"
29 #include "bstick.h"
30 #include "goodscoverage.h"
31 #include "coverage.h"
32 #include "efron.h"
33 #include "boneh.h"
34 #include "solow.h"
35 #include "shen.h"
36 #include "subsample.h"
37
38 //**********************************************************************************************************************
39 vector<string> SummaryCommand::setParameters(){ 
40         try {
41                 CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(plist);
42                 CommandParameter prabund("rabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(prabund);
43                 CommandParameter psabund("sabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(psabund);
44                 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(pshared);
45         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
46         CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
47                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
48                 CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-geometric-qstat-logseries-bergerparker-bstick-goodscoverage-efron-boneh-solow-shen", "sobs-chao-ace-jack-shannon-npshannon-simpson", "", "", "",true,false); parameters.push_back(pcalc);
49                 CommandParameter pabund("abund", "Number", "", "10", "", "", "",false,false); parameters.push_back(pabund);
50                 CommandParameter psize("size", "Number", "", "0", "", "", "",false,false); parameters.push_back(psize);
51                 CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pgroupmode);
52                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
53                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
54                 
55                 vector<string> myArray;
56                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
57                 return myArray;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "SummaryCommand", "setParameters");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 string SummaryCommand::getHelpString(){ 
66         try {
67                 string helpString = "";
68                 ValidCalculators validCalculator;
69                 helpString += "The summary.single command parameters are list, sabund, rabund, shared, subsample, iters, label, calc, abund and groupmode.  list, sabund, rabund or shared is required unless you have a valid current file.\n";
70                 helpString += "The summary.single command should be in the following format: \n";
71                 helpString += "summary.single(label=yourLabel, calc=yourEstimators).\n";
72                 helpString += "Example summary.single(label=unique-.01-.03, calc=sobs-chao-ace-jack-bootstrap-shannon-npshannon-simpson).\n";
73                 helpString += validCalculator.printCalc("summary");
74         helpString += "The subsample parameter allows you to enter the size of the sample or you can set subsample=T and mothur will use the size of your smallest group in the case of a shared file. With a list, sabund or rabund file you must provide a subsample size.\n";
75         helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
76                 helpString += "The default value calc is sobs-chao-ace-jack-shannon-npshannon-simpson\n";
77                 helpString += "If you are running summary.single with a shared file and would like your summary results collated in one file, set groupmode=t. (Default=true).\n";
78                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
79                 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabels).\n";
80                 return helpString;
81         }
82         catch(exception& e) {
83                 m->errorOut(e, "SummaryCommand", "getHelpString");
84                 exit(1);
85         }
86 }
87
88 //**********************************************************************************************************************
89 SummaryCommand::SummaryCommand(){       
90         try {
91                 abort = true; calledHelp = true; 
92                 setParameters();
93                 vector<string> tempOutNames;
94                 outputTypes["summary"] = tempOutNames;
95         }
96         catch(exception& e) {
97                 m->errorOut(e, "SummaryCommand", "SummaryCommand");
98                 exit(1);
99         }
100 }
101 //**********************************************************************************************************************
102
103 SummaryCommand::SummaryCommand(string option)  {
104         try {
105                 abort = false; calledHelp = false;   
106                 allLines = 1;
107                                 
108                 //allow user to run help
109                 if(option == "help") {  help();  abort = true; calledHelp = true; }
110                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
111                 
112                 else {
113                         vector<string> myArray = setParameters();
114                         
115                         OptionParser parser(option);
116                         map<string,string> parameters = parser.getParameters();
117                         map<string,string>::iterator it;
118                         
119                         ValidParameters validParameter;
120                         
121                         //check to make sure all parameters are valid for command
122                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
123                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
124                         }
125                         
126                         //initialize outputTypes
127                         vector<string> tempOutNames;
128                         outputTypes["summary"] = tempOutNames;
129                         
130                         //if the user changes the input directory command factory will send this info to us in the output parameter 
131                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
132                         if (inputDir == "not found"){   inputDir = "";          }
133                         else {
134                                 string path;
135                                 it = parameters.find("shared");
136                                 //user has given a template file
137                                 if(it != parameters.end()){ 
138                                         path = m->hasPath(it->second);
139                                         //if the user has not given a path then, add inputdir. else leave path alone.
140                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
141                                 }
142                                 
143                                 it = parameters.find("rabund");
144                                 //user has given a template file
145                                 if(it != parameters.end()){ 
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["rabund"] = inputDir + it->second;           }
149                                 }
150                                 
151                                 it = parameters.find("sabund");
152                                 //user has given a template file
153                                 if(it != parameters.end()){ 
154                                         path = m->hasPath(it->second);
155                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                         if (path == "") {       parameters["sabund"] = inputDir + it->second;           }
157                                 }
158                                 
159                                 it = parameters.find("list");
160                                 //user has given a template file
161                                 if(it != parameters.end()){ 
162                                         path = m->hasPath(it->second);
163                                         //if the user has not given a path then, add inputdir. else leave path alone.
164                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
165                                 }
166                         }
167                         
168                         //check for required parameters
169                         listfile = validParameter.validFile(parameters, "list", true);
170                         if (listfile == "not open") { listfile = ""; abort = true; }
171                         else if (listfile == "not found") { listfile = ""; }
172                         else {  format = "list"; inputfile = listfile; m->setListFile(listfile); }
173                         
174                         sabundfile = validParameter.validFile(parameters, "sabund", true);
175                         if (sabundfile == "not open") { sabundfile = ""; abort = true; }        
176                         else if (sabundfile == "not found") { sabundfile = ""; }
177                         else {  format = "sabund"; inputfile = sabundfile; m->setSabundFile(sabundfile); }
178                         
179                         rabundfile = validParameter.validFile(parameters, "rabund", true);
180                         if (rabundfile == "not open") { rabundfile = ""; abort = true; }        
181                         else if (rabundfile == "not found") { rabundfile = ""; }
182                         else {  format = "rabund"; inputfile = rabundfile; m->setRabundFile(rabundfile); }
183                         
184                         sharedfile = validParameter.validFile(parameters, "shared", true);
185                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
186                         else if (sharedfile == "not found") { sharedfile = ""; }
187                         else {  format = "sharedfile"; inputfile = sharedfile; m->setSharedFile(sharedfile); }
188                         
189                         if ((sharedfile == "") && (listfile == "") && (rabundfile == "") && (sabundfile == "")) { 
190                                 //is there are current file available for any of these?
191                                 //give priority to shared, then list, then rabund, then sabund
192                                 //if there is a current shared file, use it
193                                 sharedfile = m->getSharedFile(); 
194                                 if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
195                                 else { 
196                                         listfile = m->getListFile(); 
197                                         if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
198                                         else { 
199                                                 rabundfile = m->getRabundFile(); 
200                                                 if (rabundfile != "") { inputfile = rabundfile; format = "rabund"; m->mothurOut("Using " + rabundfile + " as input file for the rabund parameter."); m->mothurOutEndLine(); }
201                                                 else { 
202                                                         sabundfile = m->getSabundFile(); 
203                                                         if (sabundfile != "") { inputfile = sabundfile; format = "sabund"; m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter."); m->mothurOutEndLine(); }
204                                                         else { 
205                                                                 m->mothurOut("No valid current files. You must provide a list, sabund, rabund or shared file before you can use the collect.single command."); m->mothurOutEndLine(); 
206                                                                 abort = true;
207                                                         }
208                                                 }
209                                         }
210                                 }
211                         }
212                         
213                         //if the user changes the output directory command factory will send this info to us in the output parameter 
214                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputfile);              }
215
216                         //check for optional parameter and set defaults
217                         // ...at some point should added some additional type checking...
218                         label = validParameter.validFile(parameters, "label", false);                   
219                         if (label == "not found") { label = ""; }
220                         else { 
221                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
222                                 else { allLines = 1;  }
223                         }
224                                 
225                         calc = validParameter.validFile(parameters, "calc", false);                     
226                         if (calc == "not found") { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson";  }
227                         else { 
228                                  if (calc == "default")  {  calc = "sobs-chao-ace-jack-shannon-npshannon-simpson";  }
229                         }
230                         m->splitAtDash(calc, Estimators);
231                         if (m->inUsersGroups("citation", Estimators)) { 
232                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
233                                 //remove citation from list of calcs
234                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
235                         }
236
237                         string temp;
238                         temp = validParameter.validFile(parameters, "abund", false);            if (temp == "not found") { temp = "10"; }
239                         m->mothurConvert(temp, abund); 
240                         
241                         temp = validParameter.validFile(parameters, "size", false);                     if (temp == "not found") { temp = "0"; }
242                         m->mothurConvert(temp, size); 
243                         
244                         temp = validParameter.validFile(parameters, "groupmode", false);                if (temp == "not found") { temp = "T"; }
245                         groupMode = m->isTrue(temp);
246                         
247             temp = validParameter.validFile(parameters, "iters", false);                        if (temp == "not found") { temp = "1000"; }
248                         m->mothurConvert(temp, iters);
249             
250             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
251                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
252             else {  
253                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
254                 else { subsample = false; subsampleSize = -1; }
255             }
256             
257             if (subsample == false) { iters = 1; }
258             else {
259                 //if you did not set a samplesize and are not using a sharedfile
260                 if ((subsampleSize == -1) && (format != "sharedfile"))  { m->mothurOut("[ERROR]: If you want to subsample with a list, rabund or sabund file, you must provide the sample size.  You can do this by setting subsample=yourSampleSize.\n");  abort=true; }
261             }
262
263                 }
264         }
265         catch(exception& e) {
266                 m->errorOut(e, "SummaryCommand", "SummaryCommand");
267                 exit(1);
268         }
269 }
270 //**********************************************************************************************************************
271
272 int SummaryCommand::execute(){
273         try {
274         
275                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
276                 
277                 if ((format != "sharedfile")) { inputFileNames.push_back(inputfile);  }
278                 else {  inputFileNames = parseSharedFile(sharedfile);  format = "rabund"; }
279                 
280                 if (m->control_pressed) { return 0; }
281                 
282                 int numLines = 0;
283                 int numCols = 0;
284                 map<string, string> groupIndex;
285         
286                 for (int p = 0; p < inputFileNames.size(); p++) {
287                         
288                         numLines = 0;
289                         numCols = 0;
290                         
291                         string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "summary";
292             string fileNameAve = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "ave";
293             string fileNameSTD = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "std";
294                         outputNames.push_back(fileNameRoot); outputTypes["summary"].push_back(fileNameRoot);
295             
296             
297                         
298                         if (inputFileNames.size() > 1) {
299                                 m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine();
300                 groupIndex[fileNameRoot] = groups[p];
301                         }
302                         
303                         sumCalculators.clear();
304                         
305                         ValidCalculators validCalculator;
306                         
307                         for (int i=0; i<Estimators.size(); i++) {
308                                 if (validCalculator.isValidCalculator("summary", Estimators[i]) == true) { 
309                                         if(Estimators[i] == "sobs"){
310                                                 sumCalculators.push_back(new Sobs());
311                                         }else if(Estimators[i] == "chao"){
312                                                 sumCalculators.push_back(new Chao1());
313                                         }else if(Estimators[i] == "coverage"){
314                                                 sumCalculators.push_back(new Coverage());
315                                         }else if(Estimators[i] == "geometric"){
316                                                 sumCalculators.push_back(new Geom());
317                                         }else if(Estimators[i] == "logseries"){
318                                                 sumCalculators.push_back(new LogSD());
319                                         }else if(Estimators[i] == "qstat"){
320                                                 sumCalculators.push_back(new QStat());
321                                         }else if(Estimators[i] == "bergerparker"){
322                                                 sumCalculators.push_back(new BergerParker());
323                                         }else if(Estimators[i] == "bstick"){
324                                                 sumCalculators.push_back(new BStick());
325                                         }else if(Estimators[i] == "ace"){
326                                                 if(abund < 5)
327                                                         abund = 10;
328                                                 sumCalculators.push_back(new Ace(abund));
329                                         }else if(Estimators[i] == "jack"){
330                                                 sumCalculators.push_back(new Jackknife());
331                                         }else if(Estimators[i] == "shannon"){
332                                                 sumCalculators.push_back(new Shannon());
333                                         }else if(Estimators[i] == "shannoneven"){
334                                                 sumCalculators.push_back(new ShannonEven());
335                                         }else if(Estimators[i] == "npshannon"){
336                                                 sumCalculators.push_back(new NPShannon());
337                                         }else if(Estimators[i] == "heip"){
338                                                 sumCalculators.push_back(new Heip());
339                                         }else if(Estimators[i] == "smithwilson"){
340                                                 sumCalculators.push_back(new SmithWilson());
341                                         }else if(Estimators[i] == "simpson"){
342                                                 sumCalculators.push_back(new Simpson());
343                                         }else if(Estimators[i] == "simpsoneven"){
344                                                 sumCalculators.push_back(new SimpsonEven());
345                                         }else if(Estimators[i] == "invsimpson"){
346                                                 sumCalculators.push_back(new InvSimpson());
347                                         }else if(Estimators[i] == "bootstrap"){
348                                                 sumCalculators.push_back(new Bootstrap());
349                                         }else if (Estimators[i] == "nseqs") { 
350                                                 sumCalculators.push_back(new NSeqs());
351                                         }else if (Estimators[i] == "goodscoverage") { 
352                                                 sumCalculators.push_back(new GoodsCoverage());
353                                         }else if (Estimators[i] == "efron") { 
354                                                 sumCalculators.push_back(new Efron(size));
355                                         }else if (Estimators[i] == "boneh") { 
356                                                 sumCalculators.push_back(new Boneh(size));
357                                         }else if (Estimators[i] == "solow") { 
358                                                 sumCalculators.push_back(new Solow(size));
359                                         }else if (Estimators[i] == "shen") { 
360                                                 sumCalculators.push_back(new Shen(size, abund));
361                                         }
362                                 }
363                         }
364                         
365                         //if the users entered no valid calculators don't execute command
366                         if (sumCalculators.size() == 0) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } return 0; }
367                         
368                         ofstream outputFileHandle;
369                         m->openOutputFile(fileNameRoot, outputFileHandle);
370                         outputFileHandle << "label";
371             
372             ofstream outAve, outSTD;
373             if (subsample) {
374                 m->openOutputFile(fileNameAve, outAve);
375                 m->openOutputFile(fileNameSTD, outSTD);
376                 outputNames.push_back(fileNameAve); outputTypes["ave"].push_back(fileNameAve);
377                 outputNames.push_back(fileNameSTD); outputTypes["std"].push_back(fileNameSTD);
378                 outAve << "label"; outSTD << "label";
379                 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
380                 outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
381                 if (inputFileNames.size() > 1) {
382                     groupIndex[fileNameAve] = groups[p];
383                     groupIndex[fileNameSTD] = groups[p];
384                 }
385             }
386                 
387                         input = new InputData(inputFileNames[p], format);
388                         sabund = input->getSAbundVector();
389                         string lastLabel = sabund->getLabel();
390                 
391                         for(int i=0;i<sumCalculators.size();i++){
392                                 if(sumCalculators[i]->getCols() == 1){
393                                         outputFileHandle << '\t' << sumCalculators[i]->getName();
394                     if (subsample) { outAve << '\t' << sumCalculators[i]->getName(); outSTD << '\t' << sumCalculators[i]->getName(); }
395                                         numCols++;
396                                 }
397                                 else{
398                                         outputFileHandle << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";
399                     if (subsample) { outAve << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; outSTD << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; }
400                                         numCols += 3;
401                                 }
402                         }
403                         outputFileHandle << endl;
404             if (subsample) { outSTD << endl; outAve << endl; }
405                         
406                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
407                         set<string> processedLabels;
408                         set<string> userLabels = labels;
409                         
410             
411             
412                         if (m->control_pressed) {  outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]);  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }  delete sabund;  delete input;  return 0;  }
413                         
414                         while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
415                                 
416                                 if (m->control_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }  delete sabund;  delete input;  return 0;  }
417                                 
418                                 if(allLines == 1 || labels.count(sabund->getLabel()) == 1){                     
419                                         
420                                         m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
421                                         processedLabels.insert(sabund->getLabel());
422                                         userLabels.erase(sabund->getLabel());
423                                         
424                     process(sabund, outputFileHandle, outAve, outSTD);
425                     
426                     if (m->control_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }  delete sabund;  delete input;  return 0;  }
427                                         numLines++;
428                                 }
429                                 
430                                 if ((m->anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
431                                         string saveLabel = sabund->getLabel();
432                                         
433                                         delete sabund;
434                                         sabund = input->getSAbundVector(lastLabel);
435                                         
436                                         m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
437                                         processedLabels.insert(sabund->getLabel());
438                                         userLabels.erase(sabund->getLabel());
439                                         
440                     process(sabund, outputFileHandle, outAve, outSTD);
441                     
442                     if (m->control_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }  delete sabund;  delete input;  return 0;  }
443                                         numLines++;
444                                         
445                                         //restore real lastlabel to save below
446                                         sabund->setLabel(saveLabel);
447                                 }               
448                                 
449                                 lastLabel = sabund->getLabel();                 
450                                 
451                                 delete sabund;
452                                 sabund = input->getSAbundVector();
453                         }
454                         
455                         if (m->control_pressed) {  outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]);  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }   delete input;  return 0;  }
456
457                         //output error messages about any remaining user labels
458                         set<string>::iterator it;
459                         bool needToRun = false;
460                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
461                                 m->mothurOut("Your file does not include the label " + *it); 
462                                 if (processedLabels.count(lastLabel) != 1) {
463                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
464                                         needToRun = true;
465                                 }else {
466                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
467                                 }
468                         }
469                         
470                         //run last label if you need to
471                         if (needToRun == true)  {
472                                 if (sabund != NULL) {   delete sabund;  }
473                                 sabund = input->getSAbundVector(lastLabel);
474                                 
475                                 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
476                 process(sabund, outputFileHandle, outAve, outSTD);
477                 
478                 if (m->control_pressed) { outputFileHandle.close(); outAve.close(); outSTD.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }  delete sabund;  delete input;  return 0;  }
479                                 numLines++;
480                                 delete sabund;
481                         }
482                         
483                         outputFileHandle.close();
484             if (subsample) { outAve.close(); outSTD.close(); }
485                         
486                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }   delete input;  return 0;  }
487
488                         
489                         delete input;  
490                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
491                 }
492                 
493                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }  return 0;  }
494                 
495                 //create summary file containing all the groups data for each label - this function just combines the info from the files already created.
496                 if ((sharedfile != "") && (groupMode)) {   vector<string> comboNames = createGroupSummaryFile(numLines, numCols, outputNames, groupIndex);  for (int i = 0; i < comboNames.size(); i++) { outputNames.push_back(comboNames[i]); } }
497                 
498                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }  return 0;  }
499                 
500                 m->mothurOutEndLine();
501                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
502                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
503                 m->mothurOutEndLine();
504                 
505                 return 0;
506         }
507         catch(exception& e) {
508                 m->errorOut(e, "SummaryCommand", "execute");
509                 exit(1);
510         }
511 }
512 //**********************************************************************************************************************
513 int SummaryCommand::process(SAbundVector*& sabund, ofstream& outputFileHandle, ofstream& outAve, ofstream& outStd) {
514     try {
515         
516         //calculator -> data -> values
517         vector< vector< vector<double> > >  results; results.resize(sumCalculators.size());
518         
519         outputFileHandle << sabund->getLabel();
520         
521         SubSample sample;
522         for (int thisIter = 0; thisIter < iters+1; thisIter++) {
523             
524             SAbundVector* thisIterSabund = sabund;
525             
526             //we want the summary results for the whole dataset, then the subsampling
527             if ((thisIter > 0) && subsample) { //subsample sabund and run it
528                 //copy sabund since getSample destroys it
529                 RAbundVector rabund = sabund->getRAbundVector();
530                 SAbundVector* newSabund = new SAbundVector();
531                 *newSabund = rabund.getSAbundVector();
532                 
533                 sample.getSample(newSabund, subsampleSize);
534                 thisIterSabund = newSabund;
535             }
536             
537             for(int i=0;i<sumCalculators.size();i++){
538                 vector<double> data = sumCalculators[i]->getValues(thisIterSabund);
539                
540                 if (m->control_pressed) {  return 0;  }
541                 
542                 if (thisIter == 0) {
543                     outputFileHandle << '\t';
544                     sumCalculators[i]->print(outputFileHandle);
545                 }else {
546                     //some of the calc have hci and lci need to make room for that
547                     if (results[i].size() == 0) {  results[i].resize(data.size());  }
548                     //save results for ave and std.
549                     for (int j = 0; j < data.size(); j++) {
550                         if (m->control_pressed) {  return 0;  }
551                         results[i][j].push_back(data[j]); 
552                     }
553                 }
554             }
555             
556             //cleanup memory
557             if ((thisIter > 0) && subsample) { delete thisIterSabund; }
558         }
559         outputFileHandle << endl;
560      
561         if (subsample) {
562             outAve << sabund->getLabel() << '\t'; outStd << sabund->getLabel() << '\t';
563             //find ave and std for this label and output
564             //will need to modify the createGroupSummary to combine results and not mess with the .summary file.
565             
566             //calcs -> values
567             vector< vector<double> >  calcAverages; calcAverages.resize(sumCalculators.size()); 
568             for (int i = 0; i < calcAverages.size(); i++) {  calcAverages[i].resize(results[i].size(), 0);  }
569             
570             for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
571                 for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
572                     for (int j = 0; j < calcAverages[i].size(); j++) {
573                         calcAverages[i][j] += results[i][j][thisIter];
574                     }
575                 }
576             }
577             
578             for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
579                 for (int j = 0; j < calcAverages[i].size(); j++) {
580                     calcAverages[i][j] /= (float) iters;
581                     outAve << calcAverages[i][j] << '\t';
582                 }
583             }
584             
585             //find standard deviation
586             vector< vector<double>  > stdDev; stdDev.resize(sumCalculators.size());
587             for (int i = 0; i < stdDev.size(); i++) {  stdDev[i].resize(results[i].size(), 0);  }
588             
589             for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
590                 for (int i = 0; i < stdDev.size(); i++) {  
591                     for (int j = 0; j < stdDev[i].size(); j++) {
592                         stdDev[i][j] += ((results[i][j][thisIter] - calcAverages[i][j]) * (results[i][j][thisIter] - calcAverages[i][j]));
593                     }
594                 }
595             }
596             
597             for (int i = 0; i < stdDev.size(); i++) {  //finds average.
598                 for (int j = 0; j < stdDev[i].size(); j++) {
599                     stdDev[i][j] /= (float) iters;
600                     stdDev[i][j] = sqrt(stdDev[i][j]);
601                     outStd << stdDev[i][j] << '\t';
602                 }
603             }
604             outAve << endl;  outStd << endl; 
605         }
606         
607         return 0;
608     }
609     catch(exception& e) {
610         m->errorOut(e, "SummaryCommand", "process");
611         exit(1);
612     }
613 }
614 //**********************************************************************************************************************
615 vector<string> SummaryCommand::parseSharedFile(string filename) {
616         try {
617                 vector<string> filenames;
618                 
619                 map<string, ofstream*> filehandles;
620                 map<string, ofstream*>::iterator it3;
621                 
622                 input = new InputData(filename, "sharedfile");
623                 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
624                 
625                 string sharedFileRoot = m->getRootName(filename);
626                 
627         /******************************************************/
628         if (subsample) { 
629             if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
630                 subsampleSize = lookup[0]->getNumSeqs();
631                 for (int i = 1; i < lookup.size(); i++) {
632                     int thisSize = lookup[i]->getNumSeqs();
633                     
634                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
635                 }
636             }else {
637                 m->clearGroups();
638                 vector<string> Groups;
639                 vector<SharedRAbundVector*> temp;
640                 for (int i = 0; i < lookup.size(); i++) {
641                     if (lookup[i]->getNumSeqs() < subsampleSize) { 
642                         m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
643                         delete lookup[i];
644                     }else { 
645                         Groups.push_back(lookup[i]->getGroup()); 
646                         temp.push_back(lookup[i]);
647                     }
648                 } 
649                 lookup = temp;
650                 m->setGroups(Groups);
651             }
652             
653             if (lookup.size() < 1) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; delete input; return filenames; }
654         }
655         
656                 
657                 /******************************************************/
658         
659         //clears file before we start to write to it below
660                 for (int i=0; i<lookup.size(); i++) {
661                         m->mothurRemove((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
662                         filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
663                 }
664         
665                 ofstream* temp;
666                 for (int i=0; i<lookup.size(); i++) {
667                         temp = new ofstream;
668                         filehandles[lookup[i]->getGroup()] = temp;
669                         groups.push_back(lookup[i]->getGroup());
670                 }
671
672                 while(lookup[0] != NULL) {
673                 
674                         for (int i = 0; i < lookup.size(); i++) {
675                                 RAbundVector rav = lookup[i]->getRAbundVector();
676                                 m->openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".rabund", *(filehandles[lookup[i]->getGroup()]));
677                                 rav.print(*(filehandles[lookup[i]->getGroup()]));
678                                 (*(filehandles[lookup[i]->getGroup()])).close();
679                         }
680                 
681                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
682                         lookup = input->getSharedRAbundVectors();
683                 }
684                 
685                 //free memory
686                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
687                         delete it3->second;
688                 }
689                 
690                 delete input;
691
692                 return filenames;
693         }
694         catch(exception& e) {
695                 m->errorOut(e, "SummaryCommand", "parseSharedFile");
696                 exit(1);
697         }
698 }
699 //**********************************************************************************************************************
700 vector<string> SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector<string>& outputNames, map<string, string> groupIndex) {
701         try {
702                                 
703                 //open each groups summary file
704         vector<string> newComboNames;
705                 string newLabel = "";
706                 map<string, map<string, vector<string> > > files;
707                 for (int i=0; i<outputNames.size(); i++) {
708             string extension = m->getExtension(outputNames[i]);
709             string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
710                         m->mothurRemove(combineFileName); //remove old file
711              
712                         vector<string> thisFilesLines;
713             
714                         ifstream temp;
715                         m->openInputFile(outputNames[i], temp);
716                         
717                         //read through first line - labels
718                         string tempLabel;
719                         if (i == 0) { //we want to save the labels to output below
720                                 for (int j = 0; j < numCols+1; j++) {  
721                                         temp >> tempLabel; 
722                                         
723                                         if (j == 1) {  newLabel += "group\t" + tempLabel + '\t';
724                                         }else{  newLabel += tempLabel + '\t';   }
725                                 }
726                         }else{  for (int j = 0; j < numCols+1; j++) {  temp >> tempLabel;  }  }
727                         
728                         m->gobble(temp);
729                         
730                         //for each label
731                         for (int k = 0; k < numLines; k++) {
732                                 
733                                 string thisLine = "";
734                                 string tempLabel;
735                                         
736                                 for (int j = 0; j < numCols+1; j++) {  
737                                         temp >> tempLabel; 
738                                                 
739                                         //save for later
740                                         if (j == 1) { thisLine += groupIndex[outputNames[i]] + "\t" + tempLabel + "\t"; }
741                                         else{  thisLine += tempLabel + "\t";    }
742                                 }
743                                         
744                                 thisLine += "\n";
745                                 
746                                 thisFilesLines.push_back(thisLine);
747                                         
748                                 m->gobble(temp);
749                         }
750             
751             map<string, map<string, vector<string> > >::iterator itFiles = files.find(extension);
752             if (itFiles != files.end()) { //add new files info to existing type
753                 files[extension][outputNames[i]] = thisFilesLines;
754             }else {
755                 map<string, vector<string> > thisFile;
756                 thisFile[outputNames[i]] = thisFilesLines;
757                 files[extension] = thisFile;
758             }
759                         
760                         temp.close();
761                         m->mothurRemove(outputNames[i]);
762                 }
763                 
764         
765         for (map<string, map<string, vector<string> > >::iterator itFiles = files.begin(); itFiles != files.end(); itFiles++) {
766             
767             if (m->control_pressed) { break; }
768             
769             string extension = itFiles->first;
770             map<string, vector<string> > thisType = itFiles->second;
771             string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
772             newComboNames.push_back(combineFileName);
773             //open combined file
774             ofstream out;
775             m->openOutputFile(combineFileName, out);
776             
777             //output label line to new file
778             out << newLabel << endl;
779                 
780             //for each label
781             for (int k = 0; k < numLines; k++) {
782                 
783                 //grab summary data for each group
784                 for (map<string, vector<string> >::iterator itType = thisType.begin(); itType != thisType.end(); itType++) {
785                     out << (itType->second)[k];
786                 }
787             }   
788                 
789             outputNames.clear();
790                 
791             out.close();
792         }
793                 
794                 //return combine file name
795                 return newComboNames;
796                 
797         }
798         catch(exception& e) {
799                 m->errorOut(e, "SummaryCommand", "createGroupSummaryFile");
800                 exit(1);
801         }
802 }
803 //**********************************************************************************************************************