]> git.donarmstrong.com Git - mothur.git/blob - summarysharedcommand.cpp
fixed bug with dist.shared subsampling. added mode parameter to dist.shared so...
[mothur.git] / summarysharedcommand.cpp
1 /*
2  *  summarysharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarysharedcommand.h"
11 #include "subsample.h"
12
13 //**********************************************************************************************************************
14 vector<string> SummarySharedCommand::setParameters(){   
15         try {
16                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
17                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
18         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
19                 CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pdistance);
20                 CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "",true,false); parameters.push_back(pcalc);
21         CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput);
22                 CommandParameter pall("all", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pall);
23         CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "SummarySharedCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string SummarySharedCommand::getHelpString(){   
40         try {
41                 string helpString = "";
42                 ValidCalculators validCalculator;
43                 helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all.  shared is required if there is no current sharedfile.\n";
44                 helpString += "The summary.shared command should be in the following format: \n";
45                 helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
46                 helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
47                 helpString +=  validCalculator.printCalc("sharedsummary");
48         helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
49         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
50         helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
51                 helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
52                 helpString += "The default value for groups is all the groups in your groupfile.\n";
53                 helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
54                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
55                 helpString += "The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
56                 helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
57                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
58                 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
59                 return helpString;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "SummarySharedCommand", "getHelpString");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 SummarySharedCommand::SummarySharedCommand(){   
68         try {
69                 abort = true; calledHelp = true; 
70                 setParameters();
71                 vector<string> tempOutNames;
72                 outputTypes["summary"] = tempOutNames;
73         }
74         catch(exception& e) {
75                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
76                 exit(1);
77         }
78 }
79 //**********************************************************************************************************************
80
81 SummarySharedCommand::SummarySharedCommand(string option)  {
82         try {
83                 abort = false; calledHelp = false;   
84                 allLines = 1;
85                                 
86                 //allow user to run help
87                 if(option == "help") {  help(); abort = true; calledHelp = true; }
88                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
89                 
90                 else {
91                         vector<string> myArray = setParameters();
92                         
93                         OptionParser parser(option);
94                         map<string, string> parameters = parser.getParameters();
95                         map<string, string>::iterator it;
96                         
97                         ValidParameters validParameter;
98                 
99                         //check to make sure all parameters are valid for command
100                         for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
101                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
102                         }
103                         
104                         //initialize outputTypes
105                         vector<string> tempOutNames;
106                         outputTypes["summary"] = tempOutNames;
107                         
108                         //if the user changes the input directory command factory will send this info to us in the output parameter 
109                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
110                         if (inputDir == "not found"){   inputDir = "";          }
111                         else {
112                                 string path;
113                                 it = parameters.find("shared");
114                                 //user has given a template file
115                                 if(it != parameters.end()){ 
116                                         path = m->hasPath(it->second);
117                                         //if the user has not given a path then, add inputdir. else leave path alone.
118                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
119                                 }
120                         }
121                         
122                         //get shared file
123                         sharedfile = validParameter.validFile(parameters, "shared", true);
124                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
125                         else if (sharedfile == "not found") { 
126                                 //if there is a current shared file, use it
127                                 sharedfile = m->getSharedFile(); 
128                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
129                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
130                         }else { m->setSharedFile(sharedfile); }
131                         
132                         
133                         //if the user changes the output directory command factory will send this info to us in the output parameter 
134                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
135                         
136
137                         //check for optional parameter and set defaults
138                         // ...at some point should added some additional type checking...
139                         label = validParameter.validFile(parameters, "label", false);                   
140                         if (label == "not found") { label = ""; }
141                         else { 
142                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
143                                 else { allLines = 1;  }
144                         }
145                         
146                                         
147                         calc = validParameter.validFile(parameters, "calc", false);                     
148                         if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
149                         else { 
150                                  if (calc == "default")  {  calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
151                         }
152                         m->splitAtDash(calc, Estimators);
153                         if (m->inUsersGroups("citation", Estimators)) { 
154                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
155                                 //remove citation from list of calcs
156                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
157                         }
158                         
159                         groups = validParameter.validFile(parameters, "groups", false);                 
160                         if (groups == "not found") { groups = ""; }
161                         else { 
162                                 m->splitAtDash(groups, Groups);
163                                 m->setGroups(Groups);
164                         }
165                         
166                         string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
167                         all = m->isTrue(temp);
168                         
169                         temp = validParameter.validFile(parameters, "distance", false);                                 if (temp == "not found") { temp = "false"; }
170                         createPhylip = m->isTrue(temp);
171                         
172             temp = validParameter.validFile(parameters, "iters", false);                        if (temp == "not found") { temp = "1000"; }
173                         m->mothurConvert(temp, iters); 
174             
175             output = validParameter.validFile(parameters, "output", false);             if(output == "not found"){      output = "lt"; }
176                         if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
177             
178             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
179                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
180             else {  
181                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
182                 else { subsample = false; }
183             }
184             
185             if (subsample == false) { iters = 1; }
186             
187                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
188                         m->setProcessors(temp);
189                         m->mothurConvert(temp, processors); 
190                         
191                         if (abort == false) {
192                         
193                                 ValidCalculators validCalculator;
194                                 int i;
195                                 
196                                 for (i=0; i<Estimators.size(); i++) {
197                                         if (validCalculator.isValidCalculator("sharedsummary", Estimators[i]) == true) { 
198                                                 if (Estimators[i] == "sharedsobs") { 
199                                                         sumCalculators.push_back(new SharedSobsCS());
200                                                 }else if (Estimators[i] == "sharedchao") { 
201                                                         sumCalculators.push_back(new SharedChao1());
202                                                 }else if (Estimators[i] == "sharedace") { 
203                                                         sumCalculators.push_back(new SharedAce());
204                                                 }else if (Estimators[i] == "jabund") {  
205                                                         sumCalculators.push_back(new JAbund());
206                                                 }else if (Estimators[i] == "sorabund") { 
207                                                         sumCalculators.push_back(new SorAbund());
208                                                 }else if (Estimators[i] == "jclass") { 
209                                                         sumCalculators.push_back(new Jclass());
210                                                 }else if (Estimators[i] == "sorclass") { 
211                                                         sumCalculators.push_back(new SorClass());
212                                                 }else if (Estimators[i] == "jest") { 
213                                                         sumCalculators.push_back(new Jest());
214                                                 }else if (Estimators[i] == "sorest") { 
215                                                         sumCalculators.push_back(new SorEst());
216                                                 }else if (Estimators[i] == "thetayc") { 
217                                                         sumCalculators.push_back(new ThetaYC());
218                                                 }else if (Estimators[i] == "thetan") { 
219                                                         sumCalculators.push_back(new ThetaN());
220                                                 }else if (Estimators[i] == "kstest") { 
221                                                         sumCalculators.push_back(new KSTest());
222                                                 }else if (Estimators[i] == "sharednseqs") { 
223                                                         sumCalculators.push_back(new SharedNSeqs());
224                                                 }else if (Estimators[i] == "ochiai") { 
225                                                         sumCalculators.push_back(new Ochiai());
226                                                 }else if (Estimators[i] == "anderberg") { 
227                                                         sumCalculators.push_back(new Anderberg());
228                                                 }else if (Estimators[i] == "kulczynski") { 
229                                                         sumCalculators.push_back(new Kulczynski());
230                                                 }else if (Estimators[i] == "kulczynskicody") { 
231                                                         sumCalculators.push_back(new KulczynskiCody());
232                                                 }else if (Estimators[i] == "lennon") { 
233                                                         sumCalculators.push_back(new Lennon());
234                                                 }else if (Estimators[i] == "morisitahorn") { 
235                                                         sumCalculators.push_back(new MorHorn());
236                                                 }else if (Estimators[i] == "braycurtis") { 
237                                                         sumCalculators.push_back(new BrayCurtis());
238                                                 }else if (Estimators[i] == "whittaker") { 
239                                                         sumCalculators.push_back(new Whittaker());
240                                                 }else if (Estimators[i] == "odum") { 
241                                                         sumCalculators.push_back(new Odum());
242                                                 }else if (Estimators[i] == "canberra") { 
243                                                         sumCalculators.push_back(new Canberra());
244                                                 }else if (Estimators[i] == "structeuclidean") { 
245                                                         sumCalculators.push_back(new StructEuclidean());
246                                                 }else if (Estimators[i] == "structchord") { 
247                                                         sumCalculators.push_back(new StructChord());
248                                                 }else if (Estimators[i] == "hellinger") { 
249                                                         sumCalculators.push_back(new Hellinger());
250                                                 }else if (Estimators[i] == "manhattan") { 
251                                                         sumCalculators.push_back(new Manhattan());
252                                                 }else if (Estimators[i] == "structpearson") { 
253                                                         sumCalculators.push_back(new StructPearson());
254                                                 }else if (Estimators[i] == "soergel") { 
255                                                         sumCalculators.push_back(new Soergel());
256                                                 }else if (Estimators[i] == "spearman") { 
257                                                         sumCalculators.push_back(new Spearman());
258                                                 }else if (Estimators[i] == "structkulczynski") { 
259                                                         sumCalculators.push_back(new StructKulczynski());
260                                                 }else if (Estimators[i] == "speciesprofile") { 
261                                                         sumCalculators.push_back(new SpeciesProfile());
262                                                 }else if (Estimators[i] == "hamming") { 
263                                                         sumCalculators.push_back(new Hamming());
264                                                 }else if (Estimators[i] == "structchi2") { 
265                                                         sumCalculators.push_back(new StructChi2());
266                                                 }else if (Estimators[i] == "gower") { 
267                                                         sumCalculators.push_back(new Gower());
268                                                 }else if (Estimators[i] == "memchi2") { 
269                                                         sumCalculators.push_back(new MemChi2());
270                                                 }else if (Estimators[i] == "memchord") { 
271                                                         sumCalculators.push_back(new MemChord());
272                                                 }else if (Estimators[i] == "memeuclidean") { 
273                                                         sumCalculators.push_back(new MemEuclidean());
274                                                 }else if (Estimators[i] == "mempearson") { 
275                                                         sumCalculators.push_back(new MemPearson());
276                                                 }
277                                         }
278                                 }
279                                 
280                                 mult = false;
281                         }
282                 }
283         }
284         catch(exception& e) {
285                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
286                 exit(1);
287         }
288 }
289 //**********************************************************************************************************************
290
291 int SummarySharedCommand::execute(){
292         try {
293         
294                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
295                 
296                 ofstream outputFileHandle, outAll;
297                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "shared.summary";
298                 
299                 //if the users entered no valid calculators don't execute command
300                 if (sumCalculators.size() == 0) { return 0; }
301                 //check if any calcs can do multiples
302                 else{
303                         if (all){ 
304                                 for (int i = 0; i < sumCalculators.size(); i++) {
305                                         if (sumCalculators[i]->getMultiple() == true) { mult = true; }
306                                 }
307                         }
308                 }
309                         
310                 input = new InputData(sharedfile, "sharedfile");
311                 lookup = input->getSharedRAbundVectors();
312                 string lastLabel = lookup[0]->getLabel();
313         
314                 /******************************************************/
315                 //output headings for files
316                 /******************************************************/
317                 //output estimator names as column headers
318                 m->openOutputFile(outputFileName, outputFileHandle);
319                 outputFileHandle << "label" <<'\t' << "comparison" << '\t'; 
320                 for(int i=0;i<sumCalculators.size();i++){
321                         outputFileHandle << '\t' << sumCalculators[i]->getName();
322                         if (sumCalculators[i]->getCols() == 3) {   outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";  }
323                 }
324                 outputFileHandle << endl;
325                 outputFileHandle.close();
326                 
327                 //create file and put column headers for multiple groups file
328                 string outAllFileName = ((m->getRootName(sharedfile)) + "sharedmultiple.summary");
329                 if (mult == true) {
330                         m->openOutputFile(outAllFileName, outAll);
331                         outputNames.push_back(outAllFileName);
332                         
333                         outAll << "label" <<'\t' << "comparison" << '\t'; 
334                         for(int i=0;i<sumCalculators.size();i++){
335                                 if (sumCalculators[i]->getMultiple() == true) { 
336                                         outAll << '\t' << sumCalculators[i]->getName();
337                                 }
338                         }
339                         outAll << endl;
340                         outAll.close();
341                 }
342                 
343                 if (lookup.size() < 2) { 
344                         m->mothurOut("I cannot run the command without at least 2 valid groups."); 
345                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
346                         
347                         //close files and clean up
348                         m->mothurRemove(outputFileName);
349                         if (mult == true) { m->mothurRemove(outAllFileName);  }
350                         return 0;
351                 //if you only have 2 groups you don't need a .sharedmultiple file
352                 }else if ((lookup.size() == 2) && (mult == true)) { 
353                         mult = false;
354                         m->mothurRemove(outAllFileName);
355                         outputNames.pop_back();
356                 }
357                 
358                 if (m->control_pressed) {
359                         if (mult) {  m->mothurRemove(outAllFileName);  }
360                         m->mothurRemove(outputFileName); 
361                         delete input;
362                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
363                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
364                         m->clearGroups(); 
365                         return 0;
366                 }
367                 /******************************************************/
368         if (subsample) { 
369             if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
370                 subsampleSize = lookup[0]->getNumSeqs();
371                 for (int i = 1; i < lookup.size(); i++) {
372                     int thisSize = lookup[i]->getNumSeqs();
373                     
374                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
375                 }
376             }else {
377                 m->clearGroups();
378                 Groups.clear();
379                 vector<SharedRAbundVector*> temp;
380                 for (int i = 0; i < lookup.size(); i++) {
381                     if (lookup[i]->getNumSeqs() < subsampleSize) { 
382                         m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
383                         delete lookup[i];
384                     }else { 
385                         Groups.push_back(lookup[i]->getGroup()); 
386                         temp.push_back(lookup[i]);
387                     }
388                 } 
389                 lookup = temp;
390                 m->setGroups(Groups);
391             }
392             
393             if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; delete input; return 0; }
394         }
395
396                 
397                 /******************************************************/
398                 //comparison breakup to be used by different processes later
399                 numGroups = lookup.size();
400                 lines.resize(processors);
401                 for (int i = 0; i < processors; i++) {
402                         lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
403                         lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
404                 }               
405                 /******************************************************/
406                 
407                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
408                 set<string> processedLabels;
409                 set<string> userLabels = labels;
410                         
411                 //as long as you are not at the end of the file or done wih the lines you want
412                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
413                         if (m->control_pressed) {
414                                 if (mult) {  m->mothurRemove(outAllFileName);  }
415                                 m->mothurRemove(outputFileName); 
416                                 delete input; 
417                                 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
418                                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
419                                 m->clearGroups(); 
420                                 return 0;
421                         }
422
423                 
424                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
425                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
426                                 process(lookup, outputFileName, outAllFileName);
427                                 
428                                 processedLabels.insert(lookup[0]->getLabel());
429                                 userLabels.erase(lookup[0]->getLabel());
430                         }
431                         
432                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
433                                         string saveLabel = lookup[0]->getLabel();
434                                         
435                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
436                                         lookup = input->getSharedRAbundVectors(lastLabel);
437
438                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
439                                         process(lookup, outputFileName, outAllFileName);
440                                         
441                                         processedLabels.insert(lookup[0]->getLabel());
442                                         userLabels.erase(lookup[0]->getLabel());
443                                         
444                                         //restore real lastlabel to save below
445                                         lookup[0]->setLabel(saveLabel);
446                         }
447                         
448                         lastLabel = lookup[0]->getLabel();                      
449                                 
450                         //get next line to process
451                         //prevent memory leak
452                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
453                         lookup = input->getSharedRAbundVectors();
454                 }
455                 
456                 if (m->control_pressed) {
457                         if (mult) { m->mothurRemove(outAllFileName);  }
458                         m->mothurRemove(outputFileName); 
459                         delete input; 
460                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
461                         m->clearGroups(); 
462                         return 0;
463                 }
464
465                 //output error messages about any remaining user labels
466                 set<string>::iterator it;
467                 bool needToRun = false;
468                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
469                         m->mothurOut("Your file does not include the label " + *it); 
470                         if (processedLabels.count(lastLabel) != 1) {
471                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
472                                 needToRun = true;
473                         }else {
474                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
475                         }
476                 }
477                 
478                 //run last label if you need to
479                 if (needToRun == true)  {
480                                 for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
481                                 lookup = input->getSharedRAbundVectors(lastLabel);
482
483                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
484                                 process(lookup, outputFileName, outAllFileName);
485                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
486                 }
487                 
488                                 
489                 //reset groups parameter
490                 m->clearGroups();  
491                 
492                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
493                 delete input;  
494                 
495                 if (m->control_pressed) {
496                         m->mothurRemove(outAllFileName);  
497                         m->mothurRemove(outputFileName); 
498                         return 0;
499                 }
500                 
501                 m->mothurOutEndLine();
502                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
503                 m->mothurOut(outputFileName); m->mothurOutEndLine();    
504                 if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine();        outputTypes["summary"].push_back(outAllFileName); }
505                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    } outputTypes["summary"].push_back(outputFileName);
506                 m->mothurOutEndLine();
507
508                 return 0;
509         }
510         catch(exception& e) {
511                 m->errorOut(e, "SummarySharedCommand", "execute");
512                 exit(1);
513         }
514 }
515 /***********************************************************/
516 int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
517         try {
518                 
519                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
520                 
521                 //output num seqs
522                 out << simMatrix.size() << endl;
523                 
524                 if (output == "lt") {
525                         for (int b = 0; b < simMatrix.size(); b++)      {
526                                 out << lookup[b]->getGroup() << '\t';
527                                 for (int n = 0; n < b; n++)     {
528                     if (m->control_pressed) { return 0; }
529                                         out << simMatrix[b][n] << '\t'; 
530                                 }
531                                 out << endl;
532                         }
533                 }else{
534                         for (int b = 0; b < simMatrix.size(); m++)      {
535                                 out << lookup[b]->getGroup() << '\t';
536                                 for (int n = 0; n < simMatrix[b].size(); n++)   {
537                     if (m->control_pressed) { return 0; }
538                                         out << simMatrix[b][n] << '\t'; 
539                                 }
540                                 out << endl;
541                         }
542                 }
543         
544         return 0;
545         }
546         catch(exception& e) {
547                 m->errorOut(e, "SummarySharedCommand", "printSims");
548                 exit(1);
549         }
550 }
551 /***********************************************************/
552 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
553         try {
554         vector< vector< vector<seqDist> > > calcDistsTotals;  //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
555         vector< vector<seqDist>  > calcDists; calcDists.resize(sumCalculators.size());          
556         
557         for (int thisIter = 0; thisIter < iters+1; thisIter++) {
558             
559             vector<SharedRAbundVector*> thisItersLookup = thisLookup;
560             
561             if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
562                 SubSample sample;
563                 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
564                 
565                 //make copy of lookup so we don't get access violations
566                 vector<SharedRAbundVector*> newLookup;
567                 for (int k = 0; k < thisItersLookup.size(); k++) {
568                     SharedRAbundVector* temp = new SharedRAbundVector();
569                     temp->setLabel(thisItersLookup[k]->getLabel());
570                     temp->setGroup(thisItersLookup[k]->getGroup());
571                     newLookup.push_back(temp);
572                 }
573                 
574                 //for each bin
575                 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
576                     if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
577                     for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
578                 }
579                 
580                 tempLabels = sample.getSample(newLookup, subsampleSize);
581                 thisItersLookup = newLookup;
582             }
583         
584             
585             if(processors == 1){
586                 driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
587                 m->appendFiles((sumFileName + ".temp"), sumFileName);
588                 m->mothurRemove((sumFileName + ".temp"));
589                 if (mult) {
590                     m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
591                     m->mothurRemove((sumAllFileName + ".temp"));
592                 }
593             }else{
594                 
595                 int process = 1;
596                 vector<int> processIDS;
597                 
598 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
599                 //loop through and create all the processes you want
600                 while (process != processors) {
601                     int pid = fork();
602                     
603                     if (pid > 0) {
604                         processIDS.push_back(pid); 
605                         process++;
606                     }else if (pid == 0){
607                         driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
608                         
609                         //only do this if you want a distance file
610                         if (createPhylip) {
611                             string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
612                             ofstream outtemp;
613                             m->openOutputFile(tempdistFileName, outtemp);
614                             
615                             for (int i = 0; i < calcDists.size(); i++) {
616                                 outtemp << calcDists[i].size() << endl;
617                                 
618                                 for (int j = 0; j < calcDists[i].size(); j++) {
619                                     outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
620                                 }
621                             }
622                             outtemp.close();
623                         }
624                         
625                         exit(0);
626                     }else { 
627                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
628                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
629                         exit(0);
630                     }
631                 }
632                 
633                 //parent do your part
634                 driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
635                 m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
636                 m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
637                 if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
638                 
639                 //force parent to wait until all the processes are done
640                 for (int i = 0; i < processIDS.size(); i++) {
641                     int temp = processIDS[i];
642                     wait(&temp);
643                 }
644                 
645                 for (int i = 0; i < processIDS.size(); i++) {
646                     m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
647                     m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
648                     if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp"));  }
649                     
650                     if (createPhylip) {
651                         string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) +  ".dist";
652                         ifstream intemp;
653                         m->openInputFile(tempdistFileName, intemp);
654                         
655                         for (int k = 0; k < calcDists.size(); k++) {
656                             int size = 0;
657                             intemp >> size; m->gobble(intemp);
658                             
659                             for (int j = 0; j < size; j++) {
660                                 int seq1 = 0;
661                                 int seq2 = 0;
662                                 float dist = 1.0;
663                                 
664                                 intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
665                                 
666                                 seqDist tempDist(seq1, seq2, dist);
667                                 calcDists[k].push_back(tempDist);
668                             }
669                         }
670                         intemp.close();
671                         m->mothurRemove(tempdistFileName);
672                     }
673                 }
674 #else
675                 //////////////////////////////////////////////////////////////////////////////////////////////////////
676                 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
677                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
678                 //Taking advantage of shared memory to pass results vectors.
679                 //////////////////////////////////////////////////////////////////////////////////////////////////////
680                 
681                 vector<summarySharedData*> pDataArray; 
682                 DWORD   dwThreadIdArray[processors-1];
683                 HANDLE  hThreadArray[processors-1]; 
684                 
685                 //Create processor worker threads.
686                 for( int i=1; i<processors; i++ ){
687                     
688                     //make copy of lookup so we don't get access violations
689                     vector<SharedRAbundVector*> newLookup;
690                     for (int k = 0; k < thisLookup.size(); k++) {
691                         SharedRAbundVector* temp = new SharedRAbundVector();
692                         temp->setLabel(thisLookup[k]->getLabel());
693                         temp->setGroup(thisLookup[k]->getGroup());
694                         newLookup.push_back(temp);
695                     }
696                     
697                     //for each bin
698                     for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
699                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
700                         for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
701                     }
702                     
703                     // Allocate memory for thread data.
704                     summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
705                     pDataArray.push_back(tempSum);
706                     processIDS.push_back(i);
707                     
708                     hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
709                 }
710                 
711                 //parent do your part
712                 driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);   
713                 m->appendFiles((sumFileName + "0.temp"), sumFileName);
714                 m->mothurRemove((sumFileName + "0.temp"));
715                 if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
716                 
717                 //Wait until all threads have terminated.
718                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
719                 
720                 //Close all thread handles and free memory allocations.
721                 for(int i=0; i < pDataArray.size(); i++){
722                     m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
723                     m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
724                     
725                     for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
726                     
727                     if (createPhylip) {
728                         for (int k = 0; k < calcDists.size(); k++) {
729                             int size = pDataArray[i]->calcDists[k].size();
730                             for (int j = 0; j < size; j++) {    calcDists[k].push_back(pDataArray[i]->calcDists[k][j]);    }
731                         }
732                     }
733                     
734                     CloseHandle(hThreadArray[i]);
735                     delete pDataArray[i];
736                 }
737                 
738 #endif
739             }
740             
741             if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
742                 
743                 calcDistsTotals.push_back(calcDists); 
744                 //clean up memory
745                 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
746                 thisItersLookup.clear();
747             }else {
748                 if (createPhylip) {
749                     for (int i = 0; i < calcDists.size(); i++) {
750                         if (m->control_pressed) { break; }
751                         
752                         //initialize matrix
753                         vector< vector<double> > matrix; //square matrix to represent the distance
754                         matrix.resize(thisLookup.size());
755                         for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
756                         
757                         for (int j = 0; j < calcDists[i].size(); j++) {
758                             int row = calcDists[i][j].seq1;
759                             int column = calcDists[i][j].seq2;
760                             double dist = calcDists[i][j].dist;
761                             
762                             matrix[row][column] = dist;
763                             matrix[column][row] = dist;
764                         }
765                         
766                         string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel()  + "." + output + ".dist";
767                         outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
768                         ofstream outDist;
769                         m->openOutputFile(distFileName, outDist);
770                         outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
771                         
772                         printSims(outDist, matrix);
773                         
774                         outDist.close();
775                     }
776                 }
777             }
778             for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
779                 }
780
781         if (iters != 1) {
782             //we need to find the average distance and standard deviation for each groups distance
783             
784             vector< vector<seqDist>  > calcAverages; calcAverages.resize(sumCalculators.size()); 
785             for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
786                 calcAverages[i].resize(calcDistsTotals[0][i].size());
787                 
788                 for (int j = 0; j < calcAverages[i].size(); j++) {
789                     calcAverages[i][j].seq1 = calcDists[i][j].seq1;
790                     calcAverages[i][j].seq2 = calcDists[i][j].seq2;
791                     calcAverages[i][j].dist = 0.0;
792                 }
793             }
794             
795             for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
796                 for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
797                     for (int j = 0; j < calcAverages[i].size(); j++) {
798                         calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
799                     }
800                 }
801             }
802             
803             for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
804                 for (int j = 0; j < calcAverages[i].size(); j++) {
805                     calcAverages[i][j].dist /= (float) iters;
806                 }
807             }
808             
809             //find standard deviation
810             vector< vector<seqDist>  > stdDev; stdDev.resize(sumCalculators.size());
811             for (int i = 0; i < stdDev.size(); i++) {  //initialize sums to zero.
812                 stdDev[i].resize(calcDistsTotals[0][i].size());
813                 
814                 for (int j = 0; j < stdDev[i].size(); j++) {
815                     stdDev[i][j].seq1 = calcDists[i][j].seq1;
816                     stdDev[i][j].seq2 = calcDists[i][j].seq2;
817                     stdDev[i][j].dist = 0.0;
818                 }
819             }
820             
821             for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
822                 for (int i = 0; i < stdDev.size(); i++) {  
823                     for (int j = 0; j < stdDev[i].size(); j++) {
824                         stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
825                     }
826                 }
827             }
828             
829             for (int i = 0; i < stdDev.size(); i++) {  //finds average.
830                 for (int j = 0; j < stdDev[i].size(); j++) {
831                     stdDev[i][j].dist /= (float) iters;
832                     stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
833                 }
834             }
835             
836             //print results
837             for (int i = 0; i < calcDists.size(); i++) {
838                 vector< vector<double> > matrix; //square matrix to represent the distance
839                 matrix.resize(thisLookup.size());
840                 for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
841                 
842                 vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
843                 stdmatrix.resize(thisLookup.size());
844                 for (int k = 0; k < thisLookup.size(); k++) {  stdmatrix[k].resize(thisLookup.size(), 0.0); }
845                 
846                 
847                 for (int j = 0; j < calcAverages[i].size(); j++) {
848                     int row = calcAverages[i][j].seq1;
849                     int column = calcAverages[i][j].seq2;
850                     float dist = calcAverages[i][j].dist;
851                     float stdDist = stdDev[i][j].dist;
852                     
853                     matrix[row][column] = dist;
854                     matrix[column][row] = dist;
855                     stdmatrix[row][column] = stdDist;
856                     stdmatrix[column][row] = stdDist;
857                 }
858                 
859                 string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel()  + "." + output + ".ave.dist";
860                 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
861                 ofstream outAve;
862                 m->openOutputFile(distFileName, outAve);
863                 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
864                 
865                 printSims(outAve, matrix);
866                 
867                 outAve.close();
868                 
869                 distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel()  + "." + output + ".std.dist";
870                 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
871                 ofstream outSTD;
872                 m->openOutputFile(distFileName, outSTD);
873                 outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
874                 
875                 printSims(outSTD, stdmatrix);
876                 
877                 outSTD.close();
878                 
879             }
880         }
881         
882         return 0;
883         }
884         catch(exception& e) {
885                 m->errorOut(e, "SummarySharedCommand", "process");
886                 exit(1);
887         }
888 }
889 /**************************************************************************************************/
890 int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) { 
891         try {
892                 
893                 //loop through calculators and add to file all for all calcs that can do mutiple groups
894                 if (mult == true) {
895                         ofstream outAll;
896                         m->openOutputFile(sumAllFile, outAll);
897                         
898                         //output label
899                         outAll << thisLookup[0]->getLabel() << '\t';
900                         
901                         //output groups names
902                         string outNames = "";
903                         for (int j = 0; j < thisLookup.size(); j++) {
904                                 outNames += thisLookup[j]->getGroup() +  "-";
905                         }
906                         outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
907                         outAll << outNames << '\t';
908                         
909                         for(int i=0;i<sumCalculators.size();i++){
910                                 if (sumCalculators[i]->getMultiple() == true) { 
911                                         sumCalculators[i]->getValues(thisLookup);
912                                         
913                                         if (m->control_pressed) { outAll.close(); return 1; }
914                                         
915                                         outAll << '\t';
916                                         sumCalculators[i]->print(outAll);
917                                 }
918                         }
919                         outAll << endl;
920                         outAll.close();
921                 }
922                 
923                 ofstream outputFileHandle;
924                 m->openOutputFile(sumFile, outputFileHandle);
925                 
926                 vector<SharedRAbundVector*> subset;
927                 for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
928
929                         for (int l = 0; l < k; l++) {
930                                 
931                                 outputFileHandle << thisLookup[0]->getLabel() << '\t';
932                                 
933                                 subset.clear(); //clear out old pair of sharedrabunds
934                                 //add new pair of sharedrabunds
935                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
936                                 
937                                 //sort groups to be alphanumeric
938                                 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
939                                         outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
940                                 }else{
941                                         outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
942                                 }
943                                 
944                                 for(int i=0;i<sumCalculators.size();i++) {
945                                         
946                                         //if this calc needs all groups to calculate the pair load all groups
947                                         if (sumCalculators[i]->getNeedsAll()) { 
948                                                 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
949                                                 for (int w = 0; w < thisLookup.size(); w++) {
950                                                         if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
951                                                 }
952                                         }
953                                         
954                                         vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
955                                         
956                                         if (m->control_pressed) { outputFileHandle.close(); return 1; }
957                                         
958                                         outputFileHandle << '\t';
959                                         sumCalculators[i]->print(outputFileHandle);
960                                         
961                                         seqDist temp(l, k, tempdata[0]);
962                                         calcDists[i].push_back(temp);
963                                 }
964                                 outputFileHandle << endl;
965                         }
966                 }
967                 
968                 outputFileHandle.close();
969                 
970                 return 0;
971         }
972         catch(exception& e) {
973                 m->errorOut(e, "SummarySharedCommand", "driver");
974                 exit(1);
975         }
976 }
977 /**************************************************************************************************/
978
979