]> git.donarmstrong.com Git - mothur.git/blob - summarycommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / summarycommand.cpp
1 /*
2  *  summarycommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarycommand.h"
11 #include "ace.h"
12 #include "sobs.h"
13 #include "nseqs.h"
14 #include "chao1.h"
15 #include "bootstrap.h"
16 #include "simpson.h"
17 #include "simpsoneven.h"
18 #include "invsimpson.h"
19 #include "npshannon.h"
20 #include "shannon.h"
21 #include "heip.h"
22 #include "smithwilson.h"
23 #include "shannoneven.h"
24 #include "jackknife.h"
25 #include "geom.h"
26 #include "logsd.h"
27 #include "qstat.h"
28 #include "bergerparker.h"
29 #include "bstick.h"
30 #include "goodscoverage.h"
31 #include "coverage.h"
32 #include "efron.h"
33 #include "boneh.h"
34 #include "solow.h"
35 #include "shen.h"
36 #include "subsample.h"
37
38 //**********************************************************************************************************************
39 vector<string> SummaryCommand::setParameters(){ 
40         try {
41                 CommandParameter plist("list", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(plist);
42                 CommandParameter prabund("rabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(prabund);
43                 CommandParameter psabund("sabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(psabund);
44                 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(pshared);
45         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
46         CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
47                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
48                 CommandParameter pcalc("calc", "Multiple", "sobs-chao-nseqs-coverage-ace-jack-shannon-shannoneven-npshannon-heip-smithwilson-simpson-simpsoneven-invsimpson-bootstrap-geometric-qstat-logseries-bergerparker-bstick-goodscoverage-efron-boneh-solow-shen", "sobs-chao-ace-jack-shannon-npshannon-simpson", "", "", "",true,false); parameters.push_back(pcalc);
49                 CommandParameter pabund("abund", "Number", "", "10", "", "", "",false,false); parameters.push_back(pabund);
50                 CommandParameter psize("size", "Number", "", "0", "", "", "",false,false); parameters.push_back(psize);
51                 CommandParameter pgroupmode("groupmode", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pgroupmode);
52                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
53                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
54                 
55                 vector<string> myArray;
56                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
57                 return myArray;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "SummaryCommand", "setParameters");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 string SummaryCommand::getHelpString(){ 
66         try {
67                 string helpString = "";
68                 ValidCalculators validCalculator;
69                 helpString += "The summary.single command parameters are list, sabund, rabund, shared, subsample, iters, label, calc, abund and groupmode.  list, sabund, rabund or shared is required unless you have a valid current file.\n";
70                 helpString += "The summary.single command should be in the following format: \n";
71                 helpString += "summary.single(label=yourLabel, calc=yourEstimators).\n";
72                 helpString += "Example summary.single(label=unique-.01-.03, calc=sobs-chao-ace-jack-bootstrap-shannon-npshannon-simpson).\n";
73                 helpString += validCalculator.printCalc("summary");
74         helpString += "The subsample parameter allows you to enter the size of the sample or you can set subsample=T and mothur will use the size of your smallest group in the case of a shared file. With a list, sabund or rabund file you must provide a subsample size.\n";
75         helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
76                 helpString += "The default value calc is sobs-chao-ace-jack-shannon-npshannon-simpson\n";
77                 helpString += "If you are running summary.single with a shared file and would like your summary results collated in one file, set groupmode=t. (Default=true).\n";
78                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
79                 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabels).\n";
80                 return helpString;
81         }
82         catch(exception& e) {
83                 m->errorOut(e, "SummaryCommand", "getHelpString");
84                 exit(1);
85         }
86 }
87 //**********************************************************************************************************************
88 string SummaryCommand::getOutputFileNameTag(string type, string inputName=""){  
89         try {
90         string outputFileName = "";
91                 map<string, vector<string> >::iterator it;
92         
93         //is this a type this command creates
94         it = outputTypes.find(type);
95         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
96         else {
97             if (type == "summary")            {   outputFileName =  "summary";   }
98             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
99         }
100         return outputFileName;
101         }
102         catch(exception& e) {
103                 m->errorOut(e, "SummaryCommand", "getOutputFileNameTag");
104                 exit(1);
105         }
106 }
107 //**********************************************************************************************************************
108 SummaryCommand::SummaryCommand(){       
109         try {
110                 abort = true; calledHelp = true; 
111                 setParameters();
112                 vector<string> tempOutNames;
113                 outputTypes["summary"] = tempOutNames;
114         }
115         catch(exception& e) {
116                 m->errorOut(e, "SummaryCommand", "SummaryCommand");
117                 exit(1);
118         }
119 }
120 //**********************************************************************************************************************
121
122 SummaryCommand::SummaryCommand(string option)  {
123         try {
124                 abort = false; calledHelp = false;   
125                 allLines = 1;
126                                 
127                 //allow user to run help
128                 if(option == "help") {  help();  abort = true; calledHelp = true; }
129                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
130                 
131                 else {
132                         vector<string> myArray = setParameters();
133                         
134                         OptionParser parser(option);
135                         map<string,string> parameters = parser.getParameters();
136                         map<string,string>::iterator it;
137                         
138                         ValidParameters validParameter;
139                         
140                         //check to make sure all parameters are valid for command
141                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
142                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
143                         }
144                         
145                         //initialize outputTypes
146                         vector<string> tempOutNames;
147                         outputTypes["summary"] = tempOutNames;
148                         
149                         //if the user changes the input directory command factory will send this info to us in the output parameter 
150                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
151                         if (inputDir == "not found"){   inputDir = "";          }
152                         else {
153                                 string path;
154                                 it = parameters.find("shared");
155                                 //user has given a template file
156                                 if(it != parameters.end()){ 
157                                         path = m->hasPath(it->second);
158                                         //if the user has not given a path then, add inputdir. else leave path alone.
159                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
160                                 }
161                                 
162                                 it = parameters.find("rabund");
163                                 //user has given a template file
164                                 if(it != parameters.end()){ 
165                                         path = m->hasPath(it->second);
166                                         //if the user has not given a path then, add inputdir. else leave path alone.
167                                         if (path == "") {       parameters["rabund"] = inputDir + it->second;           }
168                                 }
169                                 
170                                 it = parameters.find("sabund");
171                                 //user has given a template file
172                                 if(it != parameters.end()){ 
173                                         path = m->hasPath(it->second);
174                                         //if the user has not given a path then, add inputdir. else leave path alone.
175                                         if (path == "") {       parameters["sabund"] = inputDir + it->second;           }
176                                 }
177                                 
178                                 it = parameters.find("list");
179                                 //user has given a template file
180                                 if(it != parameters.end()){ 
181                                         path = m->hasPath(it->second);
182                                         //if the user has not given a path then, add inputdir. else leave path alone.
183                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
184                                 }
185                         }
186                         
187                         //check for required parameters
188                         listfile = validParameter.validFile(parameters, "list", true);
189                         if (listfile == "not open") { listfile = ""; abort = true; }
190                         else if (listfile == "not found") { listfile = ""; }
191                         else {  format = "list"; inputfile = listfile; m->setListFile(listfile); }
192                         
193                         sabundfile = validParameter.validFile(parameters, "sabund", true);
194                         if (sabundfile == "not open") { sabundfile = ""; abort = true; }        
195                         else if (sabundfile == "not found") { sabundfile = ""; }
196                         else {  format = "sabund"; inputfile = sabundfile; m->setSabundFile(sabundfile); }
197                         
198                         rabundfile = validParameter.validFile(parameters, "rabund", true);
199                         if (rabundfile == "not open") { rabundfile = ""; abort = true; }        
200                         else if (rabundfile == "not found") { rabundfile = ""; }
201                         else {  format = "rabund"; inputfile = rabundfile; m->setRabundFile(rabundfile); }
202                         
203                         sharedfile = validParameter.validFile(parameters, "shared", true);
204                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
205                         else if (sharedfile == "not found") { sharedfile = ""; }
206                         else {  format = "sharedfile"; inputfile = sharedfile; m->setSharedFile(sharedfile); }
207                         
208                         if ((sharedfile == "") && (listfile == "") && (rabundfile == "") && (sabundfile == "")) { 
209                                 //is there are current file available for any of these?
210                                 //give priority to shared, then list, then rabund, then sabund
211                                 //if there is a current shared file, use it
212                                 sharedfile = m->getSharedFile(); 
213                                 if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
214                                 else { 
215                                         listfile = m->getListFile(); 
216                                         if (listfile != "") { inputfile = listfile; format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
217                                         else { 
218                                                 rabundfile = m->getRabundFile(); 
219                                                 if (rabundfile != "") { inputfile = rabundfile; format = "rabund"; m->mothurOut("Using " + rabundfile + " as input file for the rabund parameter."); m->mothurOutEndLine(); }
220                                                 else { 
221                                                         sabundfile = m->getSabundFile(); 
222                                                         if (sabundfile != "") { inputfile = sabundfile; format = "sabund"; m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter."); m->mothurOutEndLine(); }
223                                                         else { 
224                                                                 m->mothurOut("No valid current files. You must provide a list, sabund, rabund or shared file before you can use the collect.single command."); m->mothurOutEndLine(); 
225                                                                 abort = true;
226                                                         }
227                                                 }
228                                         }
229                                 }
230                         }
231                         
232                         //if the user changes the output directory command factory will send this info to us in the output parameter 
233                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputfile);              }
234
235                         //check for optional parameter and set defaults
236                         // ...at some point should added some additional type checking...
237                         label = validParameter.validFile(parameters, "label", false);                   
238                         if (label == "not found") { label = ""; }
239                         else { 
240                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
241                                 else { allLines = 1;  }
242                         }
243                                 
244                         calc = validParameter.validFile(parameters, "calc", false);                     
245                         if (calc == "not found") { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson";  }
246                         else { 
247                                  if (calc == "default")  {  calc = "sobs-chao-ace-jack-shannon-npshannon-simpson";  }
248                         }
249                         m->splitAtDash(calc, Estimators);
250                         if (m->inUsersGroups("citation", Estimators)) { 
251                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
252                                 //remove citation from list of calcs
253                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
254                         }
255
256                         string temp;
257                         temp = validParameter.validFile(parameters, "abund", false);            if (temp == "not found") { temp = "10"; }
258                         m->mothurConvert(temp, abund); 
259                         
260                         temp = validParameter.validFile(parameters, "size", false);                     if (temp == "not found") { temp = "0"; }
261                         m->mothurConvert(temp, size); 
262                         
263                         temp = validParameter.validFile(parameters, "groupmode", false);                if (temp == "not found") { temp = "T"; }
264                         groupMode = m->isTrue(temp);
265                         
266             temp = validParameter.validFile(parameters, "iters", false);                        if (temp == "not found") { temp = "1000"; }
267                         m->mothurConvert(temp, iters);
268             
269             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
270                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
271             else {  
272                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
273                 else { subsample = false; subsampleSize = -1; }
274             }
275             
276             if (subsample == false) { iters = 1; }
277             else {
278                 //if you did not set a samplesize and are not using a sharedfile
279                 if ((subsampleSize == -1) && (format != "sharedfile"))  { m->mothurOut("[ERROR]: If you want to subsample with a list, rabund or sabund file, you must provide the sample size.  You can do this by setting subsample=yourSampleSize.\n");  abort=true; }
280             }
281
282                 }
283         }
284         catch(exception& e) {
285                 m->errorOut(e, "SummaryCommand", "SummaryCommand");
286                 exit(1);
287         }
288 }
289 //**********************************************************************************************************************
290
291 int SummaryCommand::execute(){
292         try {
293         
294                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
295                 
296                 if ((format != "sharedfile")) { inputFileNames.push_back(inputfile);  }
297                 else {  inputFileNames = parseSharedFile(sharedfile);  format = "rabund"; }
298                 
299                 if (m->control_pressed) { return 0; }
300                 
301                 int numLines = 0;
302                 int numCols = 0;
303                 map<string, string> groupIndex;
304         
305                 for (int p = 0; p < inputFileNames.size(); p++) {
306                         
307                         numLines = 0;
308                         numCols = 0;
309                         
310                         string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + getOutputFileNameTag("summary");
311             string fileNameAve = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])) + "ave-std." + getOutputFileNameTag("summary");
312             outputNames.push_back(fileNameRoot); outputTypes["summary"].push_back(fileNameRoot);
313             
314                         if (inputFileNames.size() > 1) {
315                                 m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine();
316                 groupIndex[fileNameRoot] = groups[p];
317                         }
318                         
319                         sumCalculators.clear();
320                         
321                         ValidCalculators validCalculator;
322                         
323                         for (int i=0; i<Estimators.size(); i++) {
324                                 if (validCalculator.isValidCalculator("summary", Estimators[i]) == true) { 
325                                         if(Estimators[i] == "sobs"){
326                                                 sumCalculators.push_back(new Sobs());
327                                         }else if(Estimators[i] == "chao"){
328                                                 sumCalculators.push_back(new Chao1());
329                                         }else if(Estimators[i] == "coverage"){
330                                                 sumCalculators.push_back(new Coverage());
331                                         }else if(Estimators[i] == "geometric"){
332                                                 sumCalculators.push_back(new Geom());
333                                         }else if(Estimators[i] == "logseries"){
334                                                 sumCalculators.push_back(new LogSD());
335                                         }else if(Estimators[i] == "qstat"){
336                                                 sumCalculators.push_back(new QStat());
337                                         }else if(Estimators[i] == "bergerparker"){
338                                                 sumCalculators.push_back(new BergerParker());
339                                         }else if(Estimators[i] == "bstick"){
340                                                 sumCalculators.push_back(new BStick());
341                                         }else if(Estimators[i] == "ace"){
342                                                 if(abund < 5)
343                                                         abund = 10;
344                                                 sumCalculators.push_back(new Ace(abund));
345                                         }else if(Estimators[i] == "jack"){
346                                                 sumCalculators.push_back(new Jackknife());
347                                         }else if(Estimators[i] == "shannon"){
348                                                 sumCalculators.push_back(new Shannon());
349                                         }else if(Estimators[i] == "shannoneven"){
350                                                 sumCalculators.push_back(new ShannonEven());
351                                         }else if(Estimators[i] == "npshannon"){
352                                                 sumCalculators.push_back(new NPShannon());
353                                         }else if(Estimators[i] == "heip"){
354                                                 sumCalculators.push_back(new Heip());
355                                         }else if(Estimators[i] == "smithwilson"){
356                                                 sumCalculators.push_back(new SmithWilson());
357                                         }else if(Estimators[i] == "simpson"){
358                                                 sumCalculators.push_back(new Simpson());
359                                         }else if(Estimators[i] == "simpsoneven"){
360                                                 sumCalculators.push_back(new SimpsonEven());
361                                         }else if(Estimators[i] == "invsimpson"){
362                                                 sumCalculators.push_back(new InvSimpson());
363                                         }else if(Estimators[i] == "bootstrap"){
364                                                 sumCalculators.push_back(new Bootstrap());
365                                         }else if (Estimators[i] == "nseqs") { 
366                                                 sumCalculators.push_back(new NSeqs());
367                                         }else if (Estimators[i] == "goodscoverage") { 
368                                                 sumCalculators.push_back(new GoodsCoverage());
369                                         }else if (Estimators[i] == "efron") { 
370                                                 sumCalculators.push_back(new Efron(size));
371                                         }else if (Estimators[i] == "boneh") { 
372                                                 sumCalculators.push_back(new Boneh(size));
373                                         }else if (Estimators[i] == "solow") { 
374                                                 sumCalculators.push_back(new Solow(size));
375                                         }else if (Estimators[i] == "shen") { 
376                                                 sumCalculators.push_back(new Shen(size, abund));
377                                         }
378                                 }
379                         }
380                         
381                         //if the users entered no valid calculators don't execute command
382                         if (sumCalculators.size() == 0) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } return 0; }
383                         
384                         ofstream outputFileHandle;
385                         m->openOutputFile(fileNameRoot, outputFileHandle);
386                         outputFileHandle << "label";
387             
388             ofstream outAve;
389             if (subsample) {
390                 m->openOutputFile(fileNameAve, outAve);
391                 outputNames.push_back(fileNameAve); outputTypes["summary"].push_back(fileNameAve);
392                 outAve << "label\tmethod"; 
393                 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
394                 if (inputFileNames.size() > 1) {
395                     groupIndex[fileNameAve] = groups[p];
396                 }
397             }
398                 
399                         input = new InputData(inputFileNames[p], format);
400                         sabund = input->getSAbundVector();
401                         string lastLabel = sabund->getLabel();
402                 
403                         for(int i=0;i<sumCalculators.size();i++){
404                                 if(sumCalculators[i]->getCols() == 1){
405                                         outputFileHandle << '\t' << sumCalculators[i]->getName();
406                     if (subsample) { outAve << '\t' << sumCalculators[i]->getName();  }
407                                         numCols++;
408                                 }
409                                 else{
410                                         outputFileHandle << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";
411                     if (subsample) { outAve << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";  }
412                                         numCols += 3;
413                                 }
414                         }
415                         outputFileHandle << endl;
416             if (subsample) {  outAve << endl; }
417                         
418                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
419                         set<string> processedLabels;
420                         set<string> userLabels = labels;
421                         
422             
423             
424                         if (m->control_pressed) {  outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]);  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }  delete sabund;  delete input;  return 0;  }
425                         
426                         while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
427                                 
428                                 if (m->control_pressed) { outputFileHandle.close(); outAve.close();  for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]);  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }  delete sabund;  delete input;  return 0;  }
429                                 
430                                 if(allLines == 1 || labels.count(sabund->getLabel()) == 1){                     
431                                         
432                                         m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
433                                         processedLabels.insert(sabund->getLabel());
434                                         userLabels.erase(sabund->getLabel());
435                                         
436                     process(sabund, outputFileHandle, outAve);
437                     
438                     if (m->control_pressed) { outputFileHandle.close(); outAve.close();  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }  delete sabund;  delete input;  return 0;  }
439                                         numLines++;
440                                 }
441                                 
442                                 if ((m->anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
443                                         string saveLabel = sabund->getLabel();
444                                         
445                                         delete sabund;
446                                         sabund = input->getSAbundVector(lastLabel);
447                                         
448                                         m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
449                                         processedLabels.insert(sabund->getLabel());
450                                         userLabels.erase(sabund->getLabel());
451                                         
452                     process(sabund, outputFileHandle, outAve);
453                     
454                     if (m->control_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }  delete sabund;  delete input;  return 0;  }
455                                         numLines++;
456                                         
457                                         //restore real lastlabel to save below
458                                         sabund->setLabel(saveLabel);
459                                 }               
460                                 
461                                 lastLabel = sabund->getLabel();                 
462                                 
463                                 delete sabund;
464                                 sabund = input->getSAbundVector();
465                         }
466                         
467                         if (m->control_pressed) {  outputFileHandle.close(); outAve.close();  for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]);  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }   delete input;  return 0;  }
468
469                         //output error messages about any remaining user labels
470                         set<string>::iterator it;
471                         bool needToRun = false;
472                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
473                                 m->mothurOut("Your file does not include the label " + *it); 
474                                 if (processedLabels.count(lastLabel) != 1) {
475                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
476                                         needToRun = true;
477                                 }else {
478                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
479                                 }
480                         }
481                         
482                         //run last label if you need to
483                         if (needToRun == true)  {
484                                 if (sabund != NULL) {   delete sabund;  }
485                                 sabund = input->getSAbundVector(lastLabel);
486                                 
487                                 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
488                 process(sabund, outputFileHandle, outAve);
489                 
490                 if (m->control_pressed) { outputFileHandle.close(); outAve.close(); for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }  delete sabund;  delete input;  return 0;  }
491                                 numLines++;
492                                 delete sabund;
493                         }
494                         
495                         outputFileHandle.close();
496             if (subsample) { outAve.close(); }
497                         
498                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }   delete input;  return 0;  }
499
500                         
501                         delete input;  
502                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
503                 }
504                 
505                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }  return 0;  }
506                 
507                 //create summary file containing all the groups data for each label - this function just combines the info from the files already created.
508                 if ((sharedfile != "") && (groupMode)) {   vector<string> comboNames = createGroupSummaryFile(numLines, numCols, outputNames, groupIndex);  for (int i = 0; i < comboNames.size(); i++) { outputNames.push_back(comboNames[i]); } }
509                 
510                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }  return 0;  }
511                 
512                 m->mothurOutEndLine();
513                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
514                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
515                 m->mothurOutEndLine();
516                 
517                 return 0;
518         }
519         catch(exception& e) {
520                 m->errorOut(e, "SummaryCommand", "execute");
521                 exit(1);
522         }
523 }
524 //**********************************************************************************************************************
525 int SummaryCommand::process(SAbundVector*& sabund, ofstream& outputFileHandle, ofstream& outAve) {
526     try {
527         
528         //calculator -> data -> values
529         vector< vector< vector<double> > >  results; results.resize(sumCalculators.size());
530         
531         outputFileHandle << sabund->getLabel();
532         
533         SubSample sample;
534         for (int thisIter = 0; thisIter < iters+1; thisIter++) {
535             
536             SAbundVector* thisIterSabund = sabund;
537             
538             //we want the summary results for the whole dataset, then the subsampling
539             if ((thisIter > 0) && subsample) { //subsample sabund and run it
540                 //copy sabund since getSample destroys it
541                 RAbundVector rabund = sabund->getRAbundVector();
542                 SAbundVector* newSabund = new SAbundVector();
543                 *newSabund = rabund.getSAbundVector();
544                 
545                 sample.getSample(newSabund, subsampleSize);
546                 thisIterSabund = newSabund;
547             }
548             
549             for(int i=0;i<sumCalculators.size();i++){
550                 vector<double> data = sumCalculators[i]->getValues(thisIterSabund);
551                
552                 if (m->control_pressed) {  return 0;  }
553                 
554                 if (thisIter == 0) {
555                     outputFileHandle << '\t';
556                     sumCalculators[i]->print(outputFileHandle);
557                 }else {
558                     //some of the calc have hci and lci need to make room for that
559                     if (results[i].size() == 0) {  results[i].resize(data.size());  }
560                     //save results for ave and std.
561                     for (int j = 0; j < data.size(); j++) {
562                         if (m->control_pressed) {  return 0;  }
563                         results[i][j].push_back(data[j]); 
564                     }
565                 }
566             }
567             
568             //cleanup memory
569             if ((thisIter > 0) && subsample) { delete thisIterSabund; }
570         }
571         outputFileHandle << endl;
572      
573         if (subsample) {
574             outAve << sabund->getLabel() << '\t' << "ave\t"; 
575             //find ave and std for this label and output
576             //will need to modify the createGroupSummary to combine results and not mess with the .summary file.
577             
578             //calcs -> values
579             vector< vector<double> >  calcAverages; calcAverages.resize(sumCalculators.size()); 
580             for (int i = 0; i < calcAverages.size(); i++) {  calcAverages[i].resize(results[i].size(), 0);  }
581             
582             for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
583                 for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
584                     for (int j = 0; j < calcAverages[i].size(); j++) {
585                         calcAverages[i][j] += results[i][j][thisIter];
586                     }
587                 }
588             }
589             
590             for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
591                 for (int j = 0; j < calcAverages[i].size(); j++) {
592                     calcAverages[i][j] /= (float) iters;
593                     outAve << calcAverages[i][j] << '\t';
594                 }
595             }
596             
597             //find standard deviation
598             vector< vector<double>  > stdDev; stdDev.resize(sumCalculators.size());
599             for (int i = 0; i < stdDev.size(); i++) {  stdDev[i].resize(results[i].size(), 0);  }
600             
601             for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
602                 for (int i = 0; i < stdDev.size(); i++) {  
603                     for (int j = 0; j < stdDev[i].size(); j++) {
604                         stdDev[i][j] += ((results[i][j][thisIter] - calcAverages[i][j]) * (results[i][j][thisIter] - calcAverages[i][j]));
605                     }
606                 }
607             }
608             
609             outAve << endl << sabund->getLabel() << '\t' << "std\t"; 
610             for (int i = 0; i < stdDev.size(); i++) {  //finds average.
611                 for (int j = 0; j < stdDev[i].size(); j++) {
612                     stdDev[i][j] /= (float) iters;
613                     stdDev[i][j] = sqrt(stdDev[i][j]);
614                     outAve << stdDev[i][j] << '\t';
615                 }
616             }
617             outAve << endl;  
618         }
619         
620         return 0;
621     }
622     catch(exception& e) {
623         m->errorOut(e, "SummaryCommand", "process");
624         exit(1);
625     }
626 }
627 //**********************************************************************************************************************
628 vector<string> SummaryCommand::parseSharedFile(string filename) {
629         try {
630                 vector<string> filenames;
631                 
632                 map<string, ofstream*> filehandles;
633                 map<string, ofstream*>::iterator it3;
634                 
635                 input = new InputData(filename, "sharedfile");
636                 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
637                 
638                 string sharedFileRoot = m->getRootName(filename);
639                 
640         /******************************************************/
641         if (subsample) { 
642             if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
643                 subsampleSize = lookup[0]->getNumSeqs();
644                 for (int i = 1; i < lookup.size(); i++) {
645                     int thisSize = lookup[i]->getNumSeqs();
646                     
647                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
648                 }
649             }else {
650                 m->clearGroups();
651                 vector<string> Groups;
652                 vector<SharedRAbundVector*> temp;
653                 for (int i = 0; i < lookup.size(); i++) {
654                     if (lookup[i]->getNumSeqs() < subsampleSize) { 
655                         m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
656                         delete lookup[i];
657                     }else { 
658                         Groups.push_back(lookup[i]->getGroup()); 
659                         temp.push_back(lookup[i]);
660                     }
661                 } 
662                 lookup = temp;
663                 m->setGroups(Groups);
664             }
665             
666             if (lookup.size() < 1) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; delete input; return filenames; }
667         }
668         
669                 
670                 /******************************************************/
671         
672         //clears file before we start to write to it below
673                 for (int i=0; i<lookup.size(); i++) {
674                         m->mothurRemove((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
675                         filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
676                 }
677         
678                 ofstream* temp;
679                 for (int i=0; i<lookup.size(); i++) {
680                         temp = new ofstream;
681                         filehandles[lookup[i]->getGroup()] = temp;
682                         groups.push_back(lookup[i]->getGroup());
683                 }
684
685                 while(lookup[0] != NULL) {
686                 
687                         for (int i = 0; i < lookup.size(); i++) {
688                                 RAbundVector rav = lookup[i]->getRAbundVector();
689                                 m->openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".rabund", *(filehandles[lookup[i]->getGroup()]));
690                                 rav.print(*(filehandles[lookup[i]->getGroup()]));
691                                 (*(filehandles[lookup[i]->getGroup()])).close();
692                         }
693                 
694                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
695                         lookup = input->getSharedRAbundVectors();
696                 }
697                 
698                 //free memory
699                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
700                         delete it3->second;
701                 }
702                 
703                 delete input;
704
705                 return filenames;
706         }
707         catch(exception& e) {
708                 m->errorOut(e, "SummaryCommand", "parseSharedFile");
709                 exit(1);
710         }
711 }
712 //**********************************************************************************************************************
713 vector<string> SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector<string>& outputNames, map<string, string> groupIndex) {
714         try {
715                                 
716                 //open each groups summary file
717         vector<string> newComboNames;
718                 
719                 map<string, map<string, vector<string> > > files;
720         map<string, string> filesTypesLabels;
721         map<string, int> filesTypesNumLines;
722                 for (int i=0; i<outputNames.size(); i++) {
723                         vector<string> thisFilesLines;
724             
725                         ifstream temp;
726                         m->openInputFile(outputNames[i], temp);
727                         
728                         //read through first line - labels
729             string labelsLine = m->getline(temp);
730             vector<string> theseLabels = m->splitWhiteSpace(labelsLine);
731             
732             string newLabel = "";
733             for (int j = 0; j < theseLabels.size(); j++) { 
734                  if (j == 1) {  newLabel += "group\t" + theseLabels[j] + '\t';
735                 }else{  newLabel += theseLabels[j] + '\t';      }
736             }
737                         
738                         m->gobble(temp);
739                         
740             int stop = numLines;
741             if (theseLabels.size() != numCols+1) {  stop = numLines*2; }
742                         //for each label
743                         for (int k = 0; k < stop; k++) {
744                                 
745                                 string thisLine = "";
746                                 string tempLabel;
747                                         
748                                 for (int j = 0; j < theseLabels.size(); j++) {  
749                                         temp >> tempLabel; 
750                                                 
751                                         //save for later
752                                         if (j == 1) { thisLine += groupIndex[outputNames[i]] + "\t" + tempLabel + "\t"; }
753                                         else{  thisLine += tempLabel + "\t";    }
754                                 }
755                                         
756                                 thisLine += "\n";
757                                 
758                                 thisFilesLines.push_back(thisLine);
759                                         
760                                 m->gobble(temp);
761                         }
762             
763             string extension = m->getExtension(outputNames[i]);
764             if (theseLabels.size() != numCols+1) { extension = ".ave-std" + extension;  }
765             string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
766                         m->mothurRemove(combineFileName); //remove old file
767             filesTypesLabels[extension] = newLabel;
768             filesTypesNumLines[extension] = stop;
769             
770             map<string, map<string, vector<string> > >::iterator itFiles = files.find(extension);
771             if (itFiles != files.end()) { //add new files info to existing type
772                 files[extension][outputNames[i]] = thisFilesLines;
773             }else {
774                 map<string, vector<string> > thisFile;
775                 thisFile[outputNames[i]] = thisFilesLines;
776                 files[extension] = thisFile;
777             }
778                         
779                         temp.close();
780                         m->mothurRemove(outputNames[i]);
781                 }
782                 
783         
784         for (map<string, map<string, vector<string> > >::iterator itFiles = files.begin(); itFiles != files.end(); itFiles++) {
785             
786             if (m->control_pressed) { break; }
787             
788             string extension = itFiles->first;
789             map<string, vector<string> > thisType = itFiles->second;
790             string combineFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "groups" + extension;
791             newComboNames.push_back(combineFileName);
792             //open combined file
793             ofstream out;
794             m->openOutputFile(combineFileName, out);
795             
796             //output label line to new file
797             out <<  filesTypesLabels[extension] << endl;
798                 
799             //for each label
800             for (int k = 0; k < filesTypesNumLines[extension]; k++) {
801                 
802                 //grab summary data for each group
803                 for (map<string, vector<string> >::iterator itType = thisType.begin(); itType != thisType.end(); itType++) {
804                     out << (itType->second)[k];
805                 }
806             }   
807                 
808             outputNames.clear();
809                 
810             out.close();
811         }
812                 
813                 //return combine file name
814                 return newComboNames;
815                 
816         }
817         catch(exception& e) {
818                 m->errorOut(e, "SummaryCommand", "createGroupSummaryFile");
819                 exit(1);
820         }
821 }
822 //**********************************************************************************************************************