]> git.donarmstrong.com Git - mothur.git/blob - summarysharedcommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / summarysharedcommand.cpp
1 /*
2  *  summarysharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarysharedcommand.h"
11 #include "subsample.h"
12
13 //**********************************************************************************************************************
14 vector<string> SummarySharedCommand::setParameters(){   
15         try {
16                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
17                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
18         CommandParameter psubsample("subsample", "String", "", "", "", "", "","phylip",false,false); parameters.push_back(psubsample);
19                 CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "","phylip",false,false); parameters.push_back(pdistance);
20                 CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
21         CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput);
22                 CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
23         CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "SummarySharedCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string SummarySharedCommand::getHelpString(){   
40         try {
41                 string helpString = "";
42                 ValidCalculators validCalculator;
43                 helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all.  shared is required if there is no current sharedfile.\n";
44                 helpString += "The summary.shared command should be in the following format: \n";
45                 helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
46                 helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
47                 helpString +=  validCalculator.printCalc("sharedsummary");
48         helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
49         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
50         helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
51                 helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
52                 helpString += "The default value for groups is all the groups in your groupfile.\n";
53                 helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
54                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
55                 helpString += "The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
56                 helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
57                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
58                 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
59                 return helpString;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "SummarySharedCommand", "getHelpString");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 string SummarySharedCommand::getOutputPattern(string type) {
68     try {
69         string pattern = "";
70         
71         if (type == "summary") {  pattern = "[filename],summary-[filename],[tag],summary"; } 
72         else if (type == "phylip") {  pattern = "[filename],[calc],[distance],[outputtag],[tag2],dist"; } 
73         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
74         
75         return pattern;
76     }
77     catch(exception& e) {
78         m->errorOut(e, "SummarySharedCommand", "getOutputPattern");
79         exit(1);
80     }
81 }
82 //**********************************************************************************************************************
83 SummarySharedCommand::SummarySharedCommand(){   
84         try {
85                 abort = true; calledHelp = true; 
86                 setParameters();
87                 vector<string> tempOutNames;
88                 outputTypes["summary"] = tempOutNames;
89         outputTypes["phylip"] = tempOutNames;
90         }
91         catch(exception& e) {
92                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
93                 exit(1);
94         }
95 }
96 //**********************************************************************************************************************
97
98 SummarySharedCommand::SummarySharedCommand(string option)  {
99         try {
100                 abort = false; calledHelp = false;   
101                 allLines = 1;
102                                 
103                 //allow user to run help
104                 if(option == "help") {  help(); abort = true; calledHelp = true; }
105                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
106                 
107                 else {
108                         vector<string> myArray = setParameters();
109                         
110                         OptionParser parser(option);
111                         map<string, string> parameters = parser.getParameters();
112                         map<string, string>::iterator it;
113                         
114                         ValidParameters validParameter;
115                 
116                         //check to make sure all parameters are valid for command
117                         for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
118                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
119                         }
120                         
121                         //initialize outputTypes
122                         vector<string> tempOutNames;
123                         outputTypes["summary"] = tempOutNames;
124             outputTypes["phylip"] = tempOutNames;
125                         
126                         //if the user changes the input directory command factory will send this info to us in the output parameter 
127                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
128                         if (inputDir == "not found"){   inputDir = "";          }
129                         else {
130                                 string path;
131                                 it = parameters.find("shared");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
137                                 }
138                         }
139                         
140                         //get shared file
141                         sharedfile = validParameter.validFile(parameters, "shared", true);
142                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
143                         else if (sharedfile == "not found") { 
144                                 //if there is a current shared file, use it
145                                 sharedfile = m->getSharedFile(); 
146                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
147                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
148                         }else { m->setSharedFile(sharedfile); }
149                         
150                         
151                         //if the user changes the output directory command factory will send this info to us in the output parameter 
152                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
153                         
154
155                         //check for optional parameter and set defaults
156                         // ...at some point should added some additional type checking...
157                         label = validParameter.validFile(parameters, "label", false);                   
158                         if (label == "not found") { label = ""; }
159                         else { 
160                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
161                                 else { allLines = 1;  }
162                         }
163                         
164                                         
165                         calc = validParameter.validFile(parameters, "calc", false);                     
166                         if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
167                         else { 
168                                  if (calc == "default")  {  calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
169                         }
170                         m->splitAtDash(calc, Estimators);
171                         if (m->inUsersGroups("citation", Estimators)) { 
172                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
173                                 //remove citation from list of calcs
174                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
175                         }
176                         
177                         groups = validParameter.validFile(parameters, "groups", false);                 
178                         if (groups == "not found") { groups = ""; }
179                         else { 
180                                 m->splitAtDash(groups, Groups);
181                                 m->setGroups(Groups);
182                         }
183                         
184                         string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
185                         all = m->isTrue(temp);
186                         
187                         temp = validParameter.validFile(parameters, "distance", false);                                 if (temp == "not found") { temp = "false"; }
188                         createPhylip = m->isTrue(temp);
189                         
190             temp = validParameter.validFile(parameters, "iters", false);                        if (temp == "not found") { temp = "1000"; }
191                         m->mothurConvert(temp, iters); 
192             
193             output = validParameter.validFile(parameters, "output", false);             
194             if(output == "not found"){  output = "lt"; }
195             else { createPhylip = true; }
196                         if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
197             
198             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
199                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
200             else {  
201                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
202                 else { subsample = false; }
203             }
204             
205             if (subsample == false) { iters = 0; }
206             
207                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
208                         m->setProcessors(temp);
209                         m->mothurConvert(temp, processors); 
210                         
211                         if (abort == false) {
212                         
213                                 ValidCalculators validCalculator;
214                                 int i;
215                                 
216                                 for (i=0; i<Estimators.size(); i++) {
217                                         if (validCalculator.isValidCalculator("sharedsummary", Estimators[i]) == true) { 
218                                                 if (Estimators[i] == "sharedsobs") { 
219                                                         sumCalculators.push_back(new SharedSobsCS());
220                                                 }else if (Estimators[i] == "sharedchao") { 
221                                                         sumCalculators.push_back(new SharedChao1());
222                                                 }else if (Estimators[i] == "sharedace") { 
223                                                         sumCalculators.push_back(new SharedAce());
224                                                 }else if (Estimators[i] == "jabund") {  
225                                                         sumCalculators.push_back(new JAbund());
226                                                 }else if (Estimators[i] == "sorabund") { 
227                                                         sumCalculators.push_back(new SorAbund());
228                                                 }else if (Estimators[i] == "jclass") { 
229                                                         sumCalculators.push_back(new Jclass());
230                                                 }else if (Estimators[i] == "sorclass") { 
231                                                         sumCalculators.push_back(new SorClass());
232                                                 }else if (Estimators[i] == "jest") { 
233                                                         sumCalculators.push_back(new Jest());
234                                                 }else if (Estimators[i] == "sorest") { 
235                                                         sumCalculators.push_back(new SorEst());
236                                                 }else if (Estimators[i] == "thetayc") { 
237                                                         sumCalculators.push_back(new ThetaYC());
238                                                 }else if (Estimators[i] == "thetan") { 
239                                                         sumCalculators.push_back(new ThetaN());
240                                                 }else if (Estimators[i] == "kstest") { 
241                                                         sumCalculators.push_back(new KSTest());
242                                                 }else if (Estimators[i] == "sharednseqs") { 
243                                                         sumCalculators.push_back(new SharedNSeqs());
244                                                 }else if (Estimators[i] == "ochiai") { 
245                                                         sumCalculators.push_back(new Ochiai());
246                                                 }else if (Estimators[i] == "anderberg") { 
247                                                         sumCalculators.push_back(new Anderberg());
248                                                 }else if (Estimators[i] == "kulczynski") { 
249                                                         sumCalculators.push_back(new Kulczynski());
250                                                 }else if (Estimators[i] == "kulczynskicody") { 
251                                                         sumCalculators.push_back(new KulczynskiCody());
252                                                 }else if (Estimators[i] == "lennon") { 
253                                                         sumCalculators.push_back(new Lennon());
254                                                 }else if (Estimators[i] == "morisitahorn") { 
255                                                         sumCalculators.push_back(new MorHorn());
256                                                 }else if (Estimators[i] == "braycurtis") { 
257                                                         sumCalculators.push_back(new BrayCurtis());
258                                                 }else if (Estimators[i] == "whittaker") { 
259                                                         sumCalculators.push_back(new Whittaker());
260                                                 }else if (Estimators[i] == "odum") { 
261                                                         sumCalculators.push_back(new Odum());
262                                                 }else if (Estimators[i] == "canberra") { 
263                                                         sumCalculators.push_back(new Canberra());
264                                                 }else if (Estimators[i] == "structeuclidean") { 
265                                                         sumCalculators.push_back(new StructEuclidean());
266                                                 }else if (Estimators[i] == "structchord") { 
267                                                         sumCalculators.push_back(new StructChord());
268                                                 }else if (Estimators[i] == "hellinger") { 
269                                                         sumCalculators.push_back(new Hellinger());
270                                                 }else if (Estimators[i] == "manhattan") { 
271                                                         sumCalculators.push_back(new Manhattan());
272                                                 }else if (Estimators[i] == "structpearson") { 
273                                                         sumCalculators.push_back(new StructPearson());
274                                                 }else if (Estimators[i] == "soergel") { 
275                                                         sumCalculators.push_back(new Soergel());
276                                                 }else if (Estimators[i] == "spearman") { 
277                                                         sumCalculators.push_back(new Spearman());
278                                                 }else if (Estimators[i] == "structkulczynski") { 
279                                                         sumCalculators.push_back(new StructKulczynski());
280                                                 }else if (Estimators[i] == "speciesprofile") { 
281                                                         sumCalculators.push_back(new SpeciesProfile());
282                                                 }else if (Estimators[i] == "hamming") { 
283                                                         sumCalculators.push_back(new Hamming());
284                                                 }else if (Estimators[i] == "structchi2") { 
285                                                         sumCalculators.push_back(new StructChi2());
286                                                 }else if (Estimators[i] == "gower") { 
287                                                         sumCalculators.push_back(new Gower());
288                                                 }else if (Estimators[i] == "memchi2") { 
289                                                         sumCalculators.push_back(new MemChi2());
290                                                 }else if (Estimators[i] == "memchord") { 
291                                                         sumCalculators.push_back(new MemChord());
292                                                 }else if (Estimators[i] == "memeuclidean") { 
293                                                         sumCalculators.push_back(new MemEuclidean());
294                                                 }else if (Estimators[i] == "mempearson") { 
295                                                         sumCalculators.push_back(new MemPearson());
296                                                 }
297                                         }
298                                 }
299                                 
300                                 mult = false;
301                         }
302                 }
303         }
304         catch(exception& e) {
305                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
306                 exit(1);
307         }
308 }
309 //**********************************************************************************************************************
310
311 int SummarySharedCommand::execute(){
312         try {
313         
314                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
315                 
316                 ofstream outputFileHandle, outAll;
317         map<string, string> variables; 
318                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
319                 string outputFileName = getOutputFileName("summary",variables);
320                 
321                 //if the users entered no valid calculators don't execute command
322                 if (sumCalculators.size() == 0) { return 0; }
323                 //check if any calcs can do multiples
324                 else{
325                         if (all){ 
326                                 for (int i = 0; i < sumCalculators.size(); i++) {
327                                         if (sumCalculators[i]->getMultiple() == true) { mult = true; }
328                                 }
329                         }
330                 }
331                         
332                 input = new InputData(sharedfile, "sharedfile");
333                 lookup = input->getSharedRAbundVectors();
334                 string lastLabel = lookup[0]->getLabel();
335         
336                 /******************************************************/
337                 //output headings for files
338                 /******************************************************/
339                 //output estimator names as column headers
340                 m->openOutputFile(outputFileName, outputFileHandle);
341                 outputFileHandle << "label" <<'\t' << "comparison" << '\t'; 
342                 for(int i=0;i<sumCalculators.size();i++){
343                         outputFileHandle << '\t' << sumCalculators[i]->getName();
344                         if (sumCalculators[i]->getCols() == 3) {   outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";  }
345                 }
346                 outputFileHandle << endl;
347                 outputFileHandle.close();
348                 
349                 //create file and put column headers for multiple groups file
350         variables["[tag]"]= "multiple";
351                 string outAllFileName = getOutputFileName("summary",variables);
352                 if (mult == true) {
353                         m->openOutputFile(outAllFileName, outAll);
354                         outputNames.push_back(outAllFileName);
355                         
356                         outAll << "label" <<'\t' << "comparison" << '\t'; 
357                         for(int i=0;i<sumCalculators.size();i++){
358                                 if (sumCalculators[i]->getMultiple() == true) { 
359                                         outAll << '\t' << sumCalculators[i]->getName();
360                                 }
361                         }
362                         outAll << endl;
363                         outAll.close();
364                 }
365                 
366                 if (lookup.size() < 2) { 
367                         m->mothurOut("I cannot run the command without at least 2 valid groups."); 
368                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
369                         
370                         //close files and clean up
371                         m->mothurRemove(outputFileName);
372                         if (mult == true) { m->mothurRemove(outAllFileName);  }
373                         return 0;
374                 //if you only have 2 groups you don't need a .sharedmultiple file
375                 }else if ((lookup.size() == 2) && (mult == true)) { 
376                         mult = false;
377                         m->mothurRemove(outAllFileName);
378                         outputNames.pop_back();
379                 }
380                 
381                 if (m->control_pressed) {
382                         if (mult) {  m->mothurRemove(outAllFileName);  }
383                         m->mothurRemove(outputFileName); 
384                         delete input;
385                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
386                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
387                         m->clearGroups(); 
388                         return 0;
389                 }
390                 /******************************************************/
391         if (subsample) { 
392             if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
393                 subsampleSize = lookup[0]->getNumSeqs();
394                 for (int i = 1; i < lookup.size(); i++) {
395                     int thisSize = lookup[i]->getNumSeqs();
396                     
397                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
398                 }
399             }else {
400                 m->clearGroups();
401                 Groups.clear();
402                 vector<SharedRAbundVector*> temp;
403                 for (int i = 0; i < lookup.size(); i++) {
404                     if (lookup[i]->getNumSeqs() < subsampleSize) { 
405                         m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
406                         delete lookup[i];
407                     }else { 
408                         Groups.push_back(lookup[i]->getGroup()); 
409                         temp.push_back(lookup[i]);
410                     }
411                 } 
412                 lookup = temp;
413                 m->setGroups(Groups);
414             }
415             
416             if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; delete input; return 0; }
417         }
418
419                 
420                 /******************************************************/
421                 //comparison breakup to be used by different processes later
422                 numGroups = lookup.size();
423                 lines.resize(processors);
424                 for (int i = 0; i < processors; i++) {
425                         lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
426                         lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
427                 }               
428                 /******************************************************/
429                 
430                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
431                 set<string> processedLabels;
432                 set<string> userLabels = labels;
433                         
434                 //as long as you are not at the end of the file or done wih the lines you want
435                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
436                         if (m->control_pressed) {
437                                 if (mult) {  m->mothurRemove(outAllFileName);  }
438                                 m->mothurRemove(outputFileName); 
439                                 delete input; 
440                                 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
441                                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
442                                 m->clearGroups(); 
443                                 return 0;
444                         }
445
446                 
447                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
448                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
449                                 process(lookup, outputFileName, outAllFileName);
450                                 
451                                 processedLabels.insert(lookup[0]->getLabel());
452                                 userLabels.erase(lookup[0]->getLabel());
453                         }
454                         
455                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
456                                         string saveLabel = lookup[0]->getLabel();
457                                         
458                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
459                                         lookup = input->getSharedRAbundVectors(lastLabel);
460
461                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
462                                         process(lookup, outputFileName, outAllFileName);
463                                         
464                                         processedLabels.insert(lookup[0]->getLabel());
465                                         userLabels.erase(lookup[0]->getLabel());
466                                         
467                                         //restore real lastlabel to save below
468                                         lookup[0]->setLabel(saveLabel);
469                         }
470                         
471                         lastLabel = lookup[0]->getLabel();                      
472                                 
473                         //get next line to process
474                         //prevent memory leak
475                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
476                         lookup = input->getSharedRAbundVectors();
477                 }
478                 
479                 if (m->control_pressed) {
480                         if (mult) { m->mothurRemove(outAllFileName);  }
481                         m->mothurRemove(outputFileName); 
482                         delete input; 
483                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
484                         m->clearGroups(); 
485                         return 0;
486                 }
487
488                 //output error messages about any remaining user labels
489                 set<string>::iterator it;
490                 bool needToRun = false;
491                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
492                         m->mothurOut("Your file does not include the label " + *it); 
493                         if (processedLabels.count(lastLabel) != 1) {
494                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
495                                 needToRun = true;
496                         }else {
497                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
498                         }
499                 }
500                 
501                 //run last label if you need to
502                 if (needToRun == true)  {
503                                 for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
504                                 lookup = input->getSharedRAbundVectors(lastLabel);
505
506                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
507                                 process(lookup, outputFileName, outAllFileName);
508                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
509                 }
510                 
511                                 
512                 //reset groups parameter
513                 m->clearGroups();  
514                 
515                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
516                 delete input;  
517                 
518                 if (m->control_pressed) {
519                         m->mothurRemove(outAllFileName);  
520                         m->mothurRemove(outputFileName); 
521                         return 0;
522                 }
523                 
524                 m->mothurOutEndLine();
525                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
526                 m->mothurOut(outputFileName); m->mothurOutEndLine();    
527                 if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine();        outputTypes["summary"].push_back(outAllFileName); }
528                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    } outputTypes["summary"].push_back(outputFileName);
529                 m->mothurOutEndLine();
530
531                 return 0;
532         }
533         catch(exception& e) {
534                 m->errorOut(e, "SummarySharedCommand", "execute");
535                 exit(1);
536         }
537 }
538 /***********************************************************/
539 int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
540         try {
541                 
542                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
543                 
544                 //output num seqs
545                 out << simMatrix.size() << endl;
546                 
547                 if (output == "lt") {
548                         for (int b = 0; b < simMatrix.size(); b++)      {
549                                 out << lookup[b]->getGroup() << '\t';
550                                 for (int n = 0; n < b; n++)     {
551                     if (m->control_pressed) { return 0; }
552                                         out << simMatrix[b][n] << '\t'; 
553                                 }
554                                 out << endl;
555                         }
556                 }else{
557                         for (int b = 0; b < simMatrix.size(); m++)      {
558                                 out << lookup[b]->getGroup() << '\t';
559                                 for (int n = 0; n < simMatrix[b].size(); n++)   {
560                     if (m->control_pressed) { return 0; }
561                                         out << simMatrix[b][n] << '\t'; 
562                                 }
563                                 out << endl;
564                         }
565                 }
566         
567         return 0;
568         }
569         catch(exception& e) {
570                 m->errorOut(e, "SummarySharedCommand", "printSims");
571                 exit(1);
572         }
573 }
574 /***********************************************************/
575 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
576         try {
577         vector< vector< vector<seqDist> > > calcDistsTotals;  //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
578         vector< vector<seqDist>  > calcDists; calcDists.resize(sumCalculators.size());          
579         
580         for (int thisIter = 0; thisIter < iters+1; thisIter++) {
581             
582             vector<SharedRAbundVector*> thisItersLookup = thisLookup;
583             
584             if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
585                 SubSample sample;
586                 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
587                 
588                 //make copy of lookup so we don't get access violations
589                 vector<SharedRAbundVector*> newLookup;
590                 for (int k = 0; k < thisItersLookup.size(); k++) {
591                     SharedRAbundVector* temp = new SharedRAbundVector();
592                     temp->setLabel(thisItersLookup[k]->getLabel());
593                     temp->setGroup(thisItersLookup[k]->getGroup());
594                     newLookup.push_back(temp);
595                 }
596                 
597                 //for each bin
598                 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
599                     if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
600                     for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
601                 }
602                 
603                 tempLabels = sample.getSample(newLookup, subsampleSize);
604                 thisItersLookup = newLookup;
605             }
606         
607             
608             if(processors == 1){
609                 driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
610                 m->appendFiles((sumFileName + ".temp"), sumFileName);
611                 m->mothurRemove((sumFileName + ".temp"));
612                 if (mult) {
613                     m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
614                     m->mothurRemove((sumAllFileName + ".temp"));
615                 }
616             }else{
617                 
618                 int process = 1;
619                 vector<int> processIDS;
620                 
621 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
622                 //loop through and create all the processes you want
623                 while (process != processors) {
624                     int pid = fork();
625                     
626                     if (pid > 0) {
627                         processIDS.push_back(pid); 
628                         process++;
629                     }else if (pid == 0){
630                         driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
631                         
632                         //only do this if you want a distance file
633                         if (createPhylip) {
634                             string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
635                             ofstream outtemp;
636                             m->openOutputFile(tempdistFileName, outtemp);
637                             
638                             for (int i = 0; i < calcDists.size(); i++) {
639                                 outtemp << calcDists[i].size() << endl;
640                                 
641                                 for (int j = 0; j < calcDists[i].size(); j++) {
642                                     outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
643                                 }
644                             }
645                             outtemp.close();
646                         }
647                         
648                         exit(0);
649                     }else { 
650                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
651                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
652                         exit(0);
653                     }
654                 }
655                 
656                 //parent do your part
657                 driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
658                 m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
659                 m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
660                 if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
661                 
662                 //force parent to wait until all the processes are done
663                 for (int i = 0; i < processIDS.size(); i++) {
664                     int temp = processIDS[i];
665                     wait(&temp);
666                 }
667                 
668                 for (int i = 0; i < processIDS.size(); i++) {
669                     m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
670                     m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
671                     if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp"));  }
672                     
673                     if (createPhylip) {
674                         string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) +  ".dist";
675                         ifstream intemp;
676                         m->openInputFile(tempdistFileName, intemp);
677                         
678                         for (int k = 0; k < calcDists.size(); k++) {
679                             int size = 0;
680                             intemp >> size; m->gobble(intemp);
681                             
682                             for (int j = 0; j < size; j++) {
683                                 int seq1 = 0;
684                                 int seq2 = 0;
685                                 float dist = 1.0;
686                                 
687                                 intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
688                                 
689                                 seqDist tempDist(seq1, seq2, dist);
690                                 calcDists[k].push_back(tempDist);
691                             }
692                         }
693                         intemp.close();
694                         m->mothurRemove(tempdistFileName);
695                     }
696                 }
697 #else
698                 //////////////////////////////////////////////////////////////////////////////////////////////////////
699                 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
700                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
701                 //Taking advantage of shared memory to pass results vectors.
702                 //////////////////////////////////////////////////////////////////////////////////////////////////////
703                 
704                 vector<summarySharedData*> pDataArray; 
705                 DWORD   dwThreadIdArray[processors-1];
706                 HANDLE  hThreadArray[processors-1]; 
707                 
708                 //Create processor worker threads.
709                 for( int i=1; i<processors; i++ ){
710                     
711                     //make copy of lookup so we don't get access violations
712                     vector<SharedRAbundVector*> newLookup;
713                     for (int k = 0; k < thisLookup.size(); k++) {
714                         SharedRAbundVector* temp = new SharedRAbundVector();
715                         temp->setLabel(thisLookup[k]->getLabel());
716                         temp->setGroup(thisLookup[k]->getGroup());
717                         newLookup.push_back(temp);
718                     }
719                     
720                     //for each bin
721                     for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
722                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
723                         for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
724                     }
725                     
726                     // Allocate memory for thread data.
727                     summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
728                     pDataArray.push_back(tempSum);
729                     processIDS.push_back(i);
730                     
731                     hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
732                 }
733                 
734                 //parent do your part
735                 driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);   
736                 m->appendFiles((sumFileName + "0.temp"), sumFileName);
737                 m->mothurRemove((sumFileName + "0.temp"));
738                 if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
739                 
740                 //Wait until all threads have terminated.
741                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
742                 
743                 //Close all thread handles and free memory allocations.
744                 for(int i=0; i < pDataArray.size(); i++){
745                     m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
746                     m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
747                     
748                     for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
749                     
750                     if (createPhylip) {
751                         for (int k = 0; k < calcDists.size(); k++) {
752                             int size = pDataArray[i]->calcDists[k].size();
753                             for (int j = 0; j < size; j++) {    calcDists[k].push_back(pDataArray[i]->calcDists[k][j]);    }
754                         }
755                     }
756                     
757                     CloseHandle(hThreadArray[i]);
758                     delete pDataArray[i];
759                 }
760                 
761 #endif
762             }
763             
764             if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
765                 
766                 calcDistsTotals.push_back(calcDists); 
767                 //clean up memory
768                 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
769                 thisItersLookup.clear();
770             }else {
771                 if (createPhylip) {
772                     for (int i = 0; i < calcDists.size(); i++) {
773                         if (m->control_pressed) { break; }
774                         
775                         //initialize matrix
776                         vector< vector<double> > matrix; //square matrix to represent the distance
777                         matrix.resize(thisLookup.size());
778                         for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
779                         
780                         for (int j = 0; j < calcDists[i].size(); j++) {
781                             int row = calcDists[i][j].seq1;
782                             int column = calcDists[i][j].seq2;
783                             double dist = calcDists[i][j].dist;
784                             
785                             matrix[row][column] = dist;
786                             matrix[column][row] = dist;
787                         }
788                         
789                         map<string, string> variables; 
790                         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
791                         variables["[calc]"] = sumCalculators[i]->getName();
792                         variables["[distance]"] = thisLookup[0]->getLabel();
793                         variables["[outputtag]"] = output;
794                         variables["[tag2]"] = "";
795                         string distFileName = getOutputFileName("phylip",variables);
796                         outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
797                         ofstream outDist;
798                         m->openOutputFile(distFileName, outDist);
799                         outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
800                         
801                         printSims(outDist, matrix);
802                         
803                         outDist.close();
804                     }
805                 }
806             }
807             for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
808                 }
809
810         if (iters != 0) {
811             //we need to find the average distance and standard deviation for each groups distance
812             
813             vector< vector<seqDist>  > calcAverages; calcAverages.resize(sumCalculators.size()); 
814             for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
815                 calcAverages[i].resize(calcDistsTotals[0][i].size());
816                 
817                 for (int j = 0; j < calcAverages[i].size(); j++) {
818                     calcAverages[i][j].seq1 = calcDists[i][j].seq1;
819                     calcAverages[i][j].seq2 = calcDists[i][j].seq2;
820                     calcAverages[i][j].dist = 0.0;
821                 }
822             }
823             
824             for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
825                 for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
826                     for (int j = 0; j < calcAverages[i].size(); j++) {
827                         calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
828                     }
829                 }
830             }
831             
832             for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
833                 for (int j = 0; j < calcAverages[i].size(); j++) {
834                     calcAverages[i][j].dist /= (float) iters;
835                 }
836             }
837             
838             //find standard deviation
839             vector< vector<seqDist>  > stdDev; stdDev.resize(sumCalculators.size());
840             for (int i = 0; i < stdDev.size(); i++) {  //initialize sums to zero.
841                 stdDev[i].resize(calcDistsTotals[0][i].size());
842                 
843                 for (int j = 0; j < stdDev[i].size(); j++) {
844                     stdDev[i][j].seq1 = calcDists[i][j].seq1;
845                     stdDev[i][j].seq2 = calcDists[i][j].seq2;
846                     stdDev[i][j].dist = 0.0;
847                 }
848             }
849             
850             for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
851                 for (int i = 0; i < stdDev.size(); i++) {  
852                     for (int j = 0; j < stdDev[i].size(); j++) {
853                         stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
854                     }
855                 }
856             }
857             
858             for (int i = 0; i < stdDev.size(); i++) {  //finds average.
859                 for (int j = 0; j < stdDev[i].size(); j++) {
860                     stdDev[i][j].dist /= (float) iters;
861                     stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
862                 }
863             }
864             
865             //print results
866             for (int i = 0; i < calcDists.size(); i++) {
867                 vector< vector<double> > matrix; //square matrix to represent the distance
868                 matrix.resize(thisLookup.size());
869                 for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
870                 
871                 vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
872                 stdmatrix.resize(thisLookup.size());
873                 for (int k = 0; k < thisLookup.size(); k++) {  stdmatrix[k].resize(thisLookup.size(), 0.0); }
874                 
875                 
876                 for (int j = 0; j < calcAverages[i].size(); j++) {
877                     int row = calcAverages[i][j].seq1;
878                     int column = calcAverages[i][j].seq2;
879                     float dist = calcAverages[i][j].dist;
880                     float stdDist = stdDev[i][j].dist;
881                     
882                     matrix[row][column] = dist;
883                     matrix[column][row] = dist;
884                     stdmatrix[row][column] = stdDist;
885                     stdmatrix[column][row] = stdDist;
886                 }
887                 
888                 map<string, string> variables; 
889                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
890                 variables["[calc]"] = sumCalculators[i]->getName();
891                 variables["[distance]"] = thisLookup[0]->getLabel();
892                 variables["[outputtag]"] = output;
893                 variables["[tag2]"] = "ave";
894                 string distFileName = getOutputFileName("phylip",variables);
895                 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
896                 ofstream outAve;
897                 m->openOutputFile(distFileName, outAve);
898                 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
899                 
900                 printSims(outAve, matrix);
901                 
902                 outAve.close();
903                 
904                 variables["[tag2]"] = "std";
905                 distFileName = getOutputFileName("phylip",variables);
906                 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
907                 ofstream outSTD;
908                 m->openOutputFile(distFileName, outSTD);
909                 outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
910                 
911                 printSims(outSTD, stdmatrix);
912                 
913                 outSTD.close();
914                 
915             }
916         }
917         
918         return 0;
919         }
920         catch(exception& e) {
921                 m->errorOut(e, "SummarySharedCommand", "process");
922                 exit(1);
923         }
924 }
925 /**************************************************************************************************/
926 int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) { 
927         try {
928                 
929                 //loop through calculators and add to file all for all calcs that can do mutiple groups
930                 if (mult == true) {
931                         ofstream outAll;
932                         m->openOutputFile(sumAllFile, outAll);
933                         
934                         //output label
935                         outAll << thisLookup[0]->getLabel() << '\t';
936                         
937                         //output groups names
938                         string outNames = "";
939                         for (int j = 0; j < thisLookup.size(); j++) {
940                                 outNames += thisLookup[j]->getGroup() +  "-";
941                         }
942                         outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
943                         outAll << outNames << '\t';
944                         
945                         for(int i=0;i<sumCalculators.size();i++){
946                                 if (sumCalculators[i]->getMultiple() == true) { 
947                                         sumCalculators[i]->getValues(thisLookup);
948                                         
949                                         if (m->control_pressed) { outAll.close(); return 1; }
950                                         
951                                         outAll << '\t';
952                                         sumCalculators[i]->print(outAll);
953                                 }
954                         }
955                         outAll << endl;
956                         outAll.close();
957                 }
958                 
959                 ofstream outputFileHandle;
960                 m->openOutputFile(sumFile, outputFileHandle);
961                 
962                 vector<SharedRAbundVector*> subset;
963                 for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
964
965                         for (int l = 0; l < k; l++) {
966                                 
967                                 outputFileHandle << thisLookup[0]->getLabel() << '\t';
968                                 
969                                 subset.clear(); //clear out old pair of sharedrabunds
970                                 //add new pair of sharedrabunds
971                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
972                                 
973                                 //sort groups to be alphanumeric
974                                 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
975                                         outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
976                                 }else{
977                                         outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
978                                 }
979                                 
980                                 for(int i=0;i<sumCalculators.size();i++) {
981                                         
982                                         //if this calc needs all groups to calculate the pair load all groups
983                                         if (sumCalculators[i]->getNeedsAll()) { 
984                                                 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
985                                                 for (int w = 0; w < thisLookup.size(); w++) {
986                                                         if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
987                                                 }
988                                         }
989                                         
990                                         vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
991                                         
992                                         if (m->control_pressed) { outputFileHandle.close(); return 1; }
993                                         
994                                         outputFileHandle << '\t';
995                                         sumCalculators[i]->print(outputFileHandle);
996                                         
997                                         seqDist temp(l, k, tempdata[0]);
998                                         calcDists[i].push_back(temp);
999                                 }
1000                                 outputFileHandle << endl;
1001                         }
1002                 }
1003                 
1004                 outputFileHandle.close();
1005                 
1006                 return 0;
1007         }
1008         catch(exception& e) {
1009                 m->errorOut(e, "SummarySharedCommand", "driver");
1010                 exit(1);
1011         }
1012 }
1013 /**************************************************************************************************/
1014
1015