]> git.donarmstrong.com Git - mothur.git/blob - summarysharedcommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / summarysharedcommand.cpp
1 /*
2  *  summarysharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarysharedcommand.h"
11 #include "subsample.h"
12
13 //**********************************************************************************************************************
14 vector<string> SummarySharedCommand::setParameters(){   
15         try {
16                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
17                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
18         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
19                 CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pdistance);
20                 CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "",true,false); parameters.push_back(pcalc);
21         CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "",false,false); parameters.push_back(poutput);
22                 CommandParameter pall("all", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pall);
23         CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "SummarySharedCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string SummarySharedCommand::getHelpString(){   
40         try {
41                 string helpString = "";
42                 ValidCalculators validCalculator;
43                 helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all.  shared is required if there is no current sharedfile.\n";
44                 helpString += "The summary.shared command should be in the following format: \n";
45                 helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
46                 helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
47                 helpString +=  validCalculator.printCalc("sharedsummary");
48         helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
49         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
50         helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
51                 helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
52                 helpString += "The default value for groups is all the groups in your groupfile.\n";
53                 helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
54                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
55                 helpString += "The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
56                 helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
57                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
58                 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
59                 return helpString;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "SummarySharedCommand", "getHelpString");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 SummarySharedCommand::SummarySharedCommand(){   
68         try {
69                 abort = true; calledHelp = true; 
70                 setParameters();
71                 vector<string> tempOutNames;
72                 outputTypes["summary"] = tempOutNames;
73         }
74         catch(exception& e) {
75                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
76                 exit(1);
77         }
78 }
79 //**********************************************************************************************************************
80
81 SummarySharedCommand::SummarySharedCommand(string option)  {
82         try {
83                 abort = false; calledHelp = false;   
84                 allLines = 1;
85                                 
86                 //allow user to run help
87                 if(option == "help") {  help(); abort = true; calledHelp = true; }
88                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
89                 
90                 else {
91                         vector<string> myArray = setParameters();
92                         
93                         OptionParser parser(option);
94                         map<string, string> parameters = parser.getParameters();
95                         map<string, string>::iterator it;
96                         
97                         ValidParameters validParameter;
98                 
99                         //check to make sure all parameters are valid for command
100                         for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
101                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
102                         }
103                         
104                         //initialize outputTypes
105                         vector<string> tempOutNames;
106                         outputTypes["summary"] = tempOutNames;
107                         
108                         //if the user changes the input directory command factory will send this info to us in the output parameter 
109                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
110                         if (inputDir == "not found"){   inputDir = "";          }
111                         else {
112                                 string path;
113                                 it = parameters.find("shared");
114                                 //user has given a template file
115                                 if(it != parameters.end()){ 
116                                         path = m->hasPath(it->second);
117                                         //if the user has not given a path then, add inputdir. else leave path alone.
118                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
119                                 }
120                         }
121                         
122                         //get shared file
123                         sharedfile = validParameter.validFile(parameters, "shared", true);
124                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
125                         else if (sharedfile == "not found") { 
126                                 //if there is a current shared file, use it
127                                 sharedfile = m->getSharedFile(); 
128                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
129                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
130                         }else { m->setSharedFile(sharedfile); }
131                         
132                         
133                         //if the user changes the output directory command factory will send this info to us in the output parameter 
134                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
135                         
136
137                         //check for optional parameter and set defaults
138                         // ...at some point should added some additional type checking...
139                         label = validParameter.validFile(parameters, "label", false);                   
140                         if (label == "not found") { label = ""; }
141                         else { 
142                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
143                                 else { allLines = 1;  }
144                         }
145                         
146                                         
147                         calc = validParameter.validFile(parameters, "calc", false);                     
148                         if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
149                         else { 
150                                  if (calc == "default")  {  calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
151                         }
152                         m->splitAtDash(calc, Estimators);
153                         if (m->inUsersGroups("citation", Estimators)) { 
154                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
155                                 //remove citation from list of calcs
156                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
157                         }
158                         
159                         groups = validParameter.validFile(parameters, "groups", false);                 
160                         if (groups == "not found") { groups = ""; }
161                         else { 
162                                 m->splitAtDash(groups, Groups);
163                                 m->setGroups(Groups);
164                         }
165                         
166                         string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
167                         all = m->isTrue(temp);
168                         
169                         temp = validParameter.validFile(parameters, "distance", false);                                 if (temp == "not found") { temp = "false"; }
170                         createPhylip = m->isTrue(temp);
171                         
172             temp = validParameter.validFile(parameters, "iters", false);                        if (temp == "not found") { temp = "1000"; }
173                         m->mothurConvert(temp, iters); 
174             
175             output = validParameter.validFile(parameters, "output", false);             if(output == "not found"){      output = "lt"; }
176                         if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
177             
178             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
179                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
180             else {  
181                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
182                 else { subsample = false; }
183             }
184             
185             if (subsample == false) { iters = 1; }
186             
187                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
188                         m->setProcessors(temp);
189                         m->mothurConvert(temp, processors); 
190                         
191                         if (abort == false) {
192                         
193                                 ValidCalculators validCalculator;
194                                 int i;
195                                 
196                                 for (i=0; i<Estimators.size(); i++) {
197                                         if (validCalculator.isValidCalculator("sharedsummary", Estimators[i]) == true) { 
198                                                 if (Estimators[i] == "sharedsobs") { 
199                                                         sumCalculators.push_back(new SharedSobsCS());
200                                                 }else if (Estimators[i] == "sharedchao") { 
201                                                         sumCalculators.push_back(new SharedChao1());
202                                                 }else if (Estimators[i] == "sharedace") { 
203                                                         sumCalculators.push_back(new SharedAce());
204                                                 }else if (Estimators[i] == "jabund") {  
205                                                         sumCalculators.push_back(new JAbund());
206                                                 }else if (Estimators[i] == "sorabund") { 
207                                                         sumCalculators.push_back(new SorAbund());
208                                                 }else if (Estimators[i] == "jclass") { 
209                                                         sumCalculators.push_back(new Jclass());
210                                                 }else if (Estimators[i] == "sorclass") { 
211                                                         sumCalculators.push_back(new SorClass());
212                                                 }else if (Estimators[i] == "jest") { 
213                                                         sumCalculators.push_back(new Jest());
214                                                 }else if (Estimators[i] == "sorest") { 
215                                                         sumCalculators.push_back(new SorEst());
216                                                 }else if (Estimators[i] == "thetayc") { 
217                                                         sumCalculators.push_back(new ThetaYC());
218                                                 }else if (Estimators[i] == "thetan") { 
219                                                         sumCalculators.push_back(new ThetaN());
220                                                 }else if (Estimators[i] == "kstest") { 
221                                                         sumCalculators.push_back(new KSTest());
222                                                 }else if (Estimators[i] == "sharednseqs") { 
223                                                         sumCalculators.push_back(new SharedNSeqs());
224                                                 }else if (Estimators[i] == "ochiai") { 
225                                                         sumCalculators.push_back(new Ochiai());
226                                                 }else if (Estimators[i] == "anderberg") { 
227                                                         sumCalculators.push_back(new Anderberg());
228                                                 }else if (Estimators[i] == "kulczynski") { 
229                                                         sumCalculators.push_back(new Kulczynski());
230                                                 }else if (Estimators[i] == "kulczynskicody") { 
231                                                         sumCalculators.push_back(new KulczynskiCody());
232                                                 }else if (Estimators[i] == "lennon") { 
233                                                         sumCalculators.push_back(new Lennon());
234                                                 }else if (Estimators[i] == "morisitahorn") { 
235                                                         sumCalculators.push_back(new MorHorn());
236                                                 }else if (Estimators[i] == "braycurtis") { 
237                                                         sumCalculators.push_back(new BrayCurtis());
238                                                 }else if (Estimators[i] == "whittaker") { 
239                                                         sumCalculators.push_back(new Whittaker());
240                                                 }else if (Estimators[i] == "odum") { 
241                                                         sumCalculators.push_back(new Odum());
242                                                 }else if (Estimators[i] == "canberra") { 
243                                                         sumCalculators.push_back(new Canberra());
244                                                 }else if (Estimators[i] == "structeuclidean") { 
245                                                         sumCalculators.push_back(new StructEuclidean());
246                                                 }else if (Estimators[i] == "structchord") { 
247                                                         sumCalculators.push_back(new StructChord());
248                                                 }else if (Estimators[i] == "hellinger") { 
249                                                         sumCalculators.push_back(new Hellinger());
250                                                 }else if (Estimators[i] == "manhattan") { 
251                                                         sumCalculators.push_back(new Manhattan());
252                                                 }else if (Estimators[i] == "structpearson") { 
253                                                         sumCalculators.push_back(new StructPearson());
254                                                 }else if (Estimators[i] == "soergel") { 
255                                                         sumCalculators.push_back(new Soergel());
256                                                 }else if (Estimators[i] == "spearman") { 
257                                                         sumCalculators.push_back(new Spearman());
258                                                 }else if (Estimators[i] == "structkulczynski") { 
259                                                         sumCalculators.push_back(new StructKulczynski());
260                                                 }else if (Estimators[i] == "speciesprofile") { 
261                                                         sumCalculators.push_back(new SpeciesProfile());
262                                                 }else if (Estimators[i] == "hamming") { 
263                                                         sumCalculators.push_back(new Hamming());
264                                                 }else if (Estimators[i] == "structchi2") { 
265                                                         sumCalculators.push_back(new StructChi2());
266                                                 }else if (Estimators[i] == "gower") { 
267                                                         sumCalculators.push_back(new Gower());
268                                                 }else if (Estimators[i] == "memchi2") { 
269                                                         sumCalculators.push_back(new MemChi2());
270                                                 }else if (Estimators[i] == "memchord") { 
271                                                         sumCalculators.push_back(new MemChord());
272                                                 }else if (Estimators[i] == "memeuclidean") { 
273                                                         sumCalculators.push_back(new MemEuclidean());
274                                                 }else if (Estimators[i] == "mempearson") { 
275                                                         sumCalculators.push_back(new MemPearson());
276                                                 }
277                                         }
278                                 }
279                                 
280                                 mult = false;
281                         }
282                 }
283         }
284         catch(exception& e) {
285                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
286                 exit(1);
287         }
288 }
289 //**********************************************************************************************************************
290
291 int SummarySharedCommand::execute(){
292         try {
293         
294                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
295                 
296                 ofstream outputFileHandle, outAll;
297                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "shared.summary";
298                 
299                 //if the users entered no valid calculators don't execute command
300                 if (sumCalculators.size() == 0) { return 0; }
301                 //check if any calcs can do multiples
302                 else{
303                         if (all){ 
304                                 for (int i = 0; i < sumCalculators.size(); i++) {
305                                         if (sumCalculators[i]->getMultiple() == true) { mult = true; }
306                                 }
307                         }
308                 }
309                         
310                 input = new InputData(sharedfile, "sharedfile");
311                 lookup = input->getSharedRAbundVectors();
312                 string lastLabel = lookup[0]->getLabel();
313         
314                 /******************************************************/
315                 //output headings for files
316                 /******************************************************/
317                 //output estimator names as column headers
318                 m->openOutputFile(outputFileName, outputFileHandle);
319                 outputFileHandle << "label" <<'\t' << "comparison" << '\t'; 
320                 for(int i=0;i<sumCalculators.size();i++){
321                         outputFileHandle << '\t' << sumCalculators[i]->getName();
322                         if (sumCalculators[i]->getCols() == 3) {   outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";  }
323                 }
324                 outputFileHandle << endl;
325                 outputFileHandle.close();
326                 
327                 //create file and put column headers for multiple groups file
328                 string outAllFileName = ((m->getRootName(sharedfile)) + "sharedmultiple.summary");
329                 if (mult == true) {
330                         m->openOutputFile(outAllFileName, outAll);
331                         outputNames.push_back(outAllFileName);
332                         
333                         outAll << "label" <<'\t' << "comparison" << '\t'; 
334                         for(int i=0;i<sumCalculators.size();i++){
335                                 if (sumCalculators[i]->getMultiple() == true) { 
336                                         outAll << '\t' << sumCalculators[i]->getName();
337                                 }
338                         }
339                         outAll << endl;
340                         outAll.close();
341                 }
342                 
343                 if (lookup.size() < 2) { 
344                         m->mothurOut("I cannot run the command without at least 2 valid groups."); 
345                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
346                         
347                         //close files and clean up
348                         m->mothurRemove(outputFileName);
349                         if (mult == true) { m->mothurRemove(outAllFileName);  }
350                         return 0;
351                 //if you only have 2 groups you don't need a .sharedmultiple file
352                 }else if ((lookup.size() == 2) && (mult == true)) { 
353                         mult = false;
354                         m->mothurRemove(outAllFileName);
355                         outputNames.pop_back();
356                 }
357                 
358                 if (m->control_pressed) {
359                         if (mult) {  m->mothurRemove(outAllFileName);  }
360                         m->mothurRemove(outputFileName); 
361                         delete input;
362                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
363                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
364                         m->clearGroups(); 
365                         return 0;
366                 }
367                 /******************************************************/
368                 
369                 
370                 /******************************************************/
371                 //comparison breakup to be used by different processes later
372                 numGroups = m->getNumGroups();
373                 lines.resize(processors);
374                 for (int i = 0; i < processors; i++) {
375                         lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
376                         lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
377                 }               
378                 /******************************************************/
379                 
380                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
381                 set<string> processedLabels;
382                 set<string> userLabels = labels;
383                         
384                 //as long as you are not at the end of the file or done wih the lines you want
385                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
386                         if (m->control_pressed) {
387                                 if (mult) {  m->mothurRemove(outAllFileName);  }
388                                 m->mothurRemove(outputFileName); 
389                                 delete input; 
390                                 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
391                                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
392                                 m->clearGroups(); 
393                                 return 0;
394                         }
395
396                 
397                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
398                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
399                                 process(lookup, outputFileName, outAllFileName);
400                                 
401                                 processedLabels.insert(lookup[0]->getLabel());
402                                 userLabels.erase(lookup[0]->getLabel());
403                         }
404                         
405                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
406                                         string saveLabel = lookup[0]->getLabel();
407                                         
408                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
409                                         lookup = input->getSharedRAbundVectors(lastLabel);
410
411                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
412                                         process(lookup, outputFileName, outAllFileName);
413                                         
414                                         processedLabels.insert(lookup[0]->getLabel());
415                                         userLabels.erase(lookup[0]->getLabel());
416                                         
417                                         //restore real lastlabel to save below
418                                         lookup[0]->setLabel(saveLabel);
419                         }
420                         
421                         lastLabel = lookup[0]->getLabel();                      
422                                 
423                         //get next line to process
424                         //prevent memory leak
425                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
426                         lookup = input->getSharedRAbundVectors();
427                 }
428                 
429                 if (m->control_pressed) {
430                         if (mult) { m->mothurRemove(outAllFileName);  }
431                         m->mothurRemove(outputFileName); 
432                         delete input; 
433                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
434                         m->clearGroups(); 
435                         return 0;
436                 }
437
438                 //output error messages about any remaining user labels
439                 set<string>::iterator it;
440                 bool needToRun = false;
441                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
442                         m->mothurOut("Your file does not include the label " + *it); 
443                         if (processedLabels.count(lastLabel) != 1) {
444                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
445                                 needToRun = true;
446                         }else {
447                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
448                         }
449                 }
450                 
451                 //run last label if you need to
452                 if (needToRun == true)  {
453                                 for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
454                                 lookup = input->getSharedRAbundVectors(lastLabel);
455
456                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
457                                 process(lookup, outputFileName, outAllFileName);
458                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
459                 }
460                 
461                                 
462                 //reset groups parameter
463                 m->clearGroups();  
464                 
465                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
466                 delete input;  
467                 
468                 if (m->control_pressed) {
469                         m->mothurRemove(outAllFileName);  
470                         m->mothurRemove(outputFileName); 
471                         return 0;
472                 }
473                 
474                 m->mothurOutEndLine();
475                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
476                 m->mothurOut(outputFileName); m->mothurOutEndLine();    
477                 if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine();        outputTypes["summary"].push_back(outAllFileName); }
478                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    } outputTypes["summary"].push_back(outputFileName);
479                 m->mothurOutEndLine();
480
481                 return 0;
482         }
483         catch(exception& e) {
484                 m->errorOut(e, "SummarySharedCommand", "execute");
485                 exit(1);
486         }
487 }
488 /***********************************************************/
489 int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
490         try {
491                 
492                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
493                 
494                 //output num seqs
495                 out << simMatrix.size() << endl;
496                 
497                 if (output == "lt") {
498                         for (int b = 0; b < simMatrix.size(); b++)      {
499                                 out << lookup[b]->getGroup() << '\t';
500                                 for (int n = 0; n < b; n++)     {
501                     if (m->control_pressed) { return 0; }
502                                         out << simMatrix[b][n] << '\t'; 
503                                 }
504                                 out << endl;
505                         }
506                 }else{
507                         for (int b = 0; b < simMatrix.size(); m++)      {
508                                 out << lookup[b]->getGroup() << '\t';
509                                 for (int n = 0; n < simMatrix[b].size(); n++)   {
510                     if (m->control_pressed) { return 0; }
511                                         out << simMatrix[b][n] << '\t'; 
512                                 }
513                                 out << endl;
514                         }
515                 }
516         
517         return 0;
518         }
519         catch(exception& e) {
520                 m->errorOut(e, "SummarySharedCommand", "printSims");
521                 exit(1);
522         }
523 }
524 /***********************************************************/
525 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
526         try {
527         vector< vector< vector<seqDist> > > calcDistsTotals;  //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
528         vector< vector<seqDist>  > calcDists; calcDists.resize(sumCalculators.size());          
529         
530         for (int thisIter = 0; thisIter < iters; thisIter++) {
531             
532             vector<SharedRAbundVector*> thisItersLookup = thisLookup;
533             
534             if (subsample) {
535                 SubSample sample;
536                 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
537                 
538                 //make copy of lookup so we don't get access violations
539                 vector<SharedRAbundVector*> newLookup;
540                 for (int k = 0; k < thisItersLookup.size(); k++) {
541                     SharedRAbundVector* temp = new SharedRAbundVector();
542                     temp->setLabel(thisItersLookup[k]->getLabel());
543                     temp->setGroup(thisItersLookup[k]->getGroup());
544                     newLookup.push_back(temp);
545                 }
546                 
547                 //for each bin
548                 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
549                     if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
550                     for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
551                 }
552                 
553                 tempLabels = sample.getSample(newLookup, subsampleSize);
554                 thisItersLookup = newLookup;
555             }
556         
557             
558             if(processors == 1){
559                 driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
560                 m->appendFiles((sumFileName + ".temp"), sumFileName);
561                 m->mothurRemove((sumFileName + ".temp"));
562                 if (mult) {
563                     m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
564                     m->mothurRemove((sumAllFileName + ".temp"));
565                 }
566             }else{
567                 
568                 int process = 1;
569                 vector<int> processIDS;
570                 
571 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
572                 //loop through and create all the processes you want
573                 while (process != processors) {
574                     int pid = fork();
575                     
576                     if (pid > 0) {
577                         processIDS.push_back(pid); 
578                         process++;
579                     }else if (pid == 0){
580                         driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
581                         
582                         //only do this if you want a distance file
583                         if (createPhylip) {
584                             string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
585                             ofstream outtemp;
586                             m->openOutputFile(tempdistFileName, outtemp);
587                             
588                             for (int i = 0; i < calcDists.size(); i++) {
589                                 outtemp << calcDists[i].size() << endl;
590                                 
591                                 for (int j = 0; j < calcDists[i].size(); j++) {
592                                     outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
593                                 }
594                             }
595                             outtemp.close();
596                         }
597                         
598                         exit(0);
599                     }else { 
600                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
601                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
602                         exit(0);
603                     }
604                 }
605                 
606                 //parent do your part
607                 driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
608                 m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
609                 m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
610                 if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
611                 
612                 //force parent to wait until all the processes are done
613                 for (int i = 0; i < processIDS.size(); i++) {
614                     int temp = processIDS[i];
615                     wait(&temp);
616                 }
617                 
618                 for (int i = 0; i < processIDS.size(); i++) {
619                     m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
620                     m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
621                     if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp"));  }
622                     
623                     if (createPhylip) {
624                         string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) +  ".dist";
625                         ifstream intemp;
626                         m->openInputFile(tempdistFileName, intemp);
627                         
628                         for (int k = 0; k < calcDists.size(); k++) {
629                             int size = 0;
630                             intemp >> size; m->gobble(intemp);
631                             
632                             for (int j = 0; j < size; j++) {
633                                 int seq1 = 0;
634                                 int seq2 = 0;
635                                 float dist = 1.0;
636                                 
637                                 intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
638                                 
639                                 seqDist tempDist(seq1, seq2, dist);
640                                 calcDists[k].push_back(tempDist);
641                             }
642                         }
643                         intemp.close();
644                         m->mothurRemove(tempdistFileName);
645                     }
646                 }
647 #else
648                 //////////////////////////////////////////////////////////////////////////////////////////////////////
649                 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
650                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
651                 //Taking advantage of shared memory to pass results vectors.
652                 //////////////////////////////////////////////////////////////////////////////////////////////////////
653                 
654                 vector<summarySharedData*> pDataArray; 
655                 DWORD   dwThreadIdArray[processors-1];
656                 HANDLE  hThreadArray[processors-1]; 
657                 
658                 //Create processor worker threads.
659                 for( int i=1; i<processors; i++ ){
660                     
661                     //make copy of lookup so we don't get access violations
662                     vector<SharedRAbundVector*> newLookup;
663                     for (int k = 0; k < thisLookup.size(); k++) {
664                         SharedRAbundVector* temp = new SharedRAbundVector();
665                         temp->setLabel(thisLookup[k]->getLabel());
666                         temp->setGroup(thisLookup[k]->getGroup());
667                         newLookup.push_back(temp);
668                     }
669                     
670                     //for each bin
671                     for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
672                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
673                         for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
674                     }
675                     
676                     // Allocate memory for thread data.
677                     summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
678                     pDataArray.push_back(tempSum);
679                     processIDS.push_back(i);
680                     
681                     hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
682                 }
683                 
684                 //parent do your part
685                 driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);   
686                 m->appendFiles((sumFileName + "0.temp"), sumFileName);
687                 m->mothurRemove((sumFileName + "0.temp"));
688                 if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
689                 
690                 //Wait until all threads have terminated.
691                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
692                 
693                 //Close all thread handles and free memory allocations.
694                 for(int i=0; i < pDataArray.size(); i++){
695                     m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
696                     m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
697                     
698                     for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
699                     
700                     if (createPhylip) {
701                         for (int k = 0; k < calcDists.size(); k++) {
702                             int size = pDataArray[i]->calcDists[k].size();
703                             for (int j = 0; j < size; j++) {    calcDists[k].push_back(pDataArray[i]->calcDists[k][j]);    }
704                         }
705                     }
706                     
707                     CloseHandle(hThreadArray[i]);
708                     delete pDataArray[i];
709                 }
710                 
711 #endif
712             }
713             calcDistsTotals.push_back(calcDists);
714             
715             if (subsample) {  
716                 
717                 //clean up memory
718                 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
719                 thisItersLookup.clear();
720                 for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
721             }
722                 }
723
724         if (iters != 1) {
725             //we need to find the average distance and standard deviation for each groups distance
726             
727             vector< vector<seqDist>  > calcAverages; calcAverages.resize(sumCalculators.size()); 
728             for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
729                 calcAverages[i].resize(calcDistsTotals[0][i].size());
730                 
731                 for (int j = 0; j < calcAverages[i].size(); j++) {
732                     calcAverages[i][j].seq1 = calcDists[i][j].seq1;
733                     calcAverages[i][j].seq2 = calcDists[i][j].seq2;
734                     calcAverages[i][j].dist = 0.0;
735                 }
736             }
737             
738             for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
739                 for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
740                     for (int j = 0; j < calcAverages[i].size(); j++) {
741                         calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
742                     }
743                 }
744             }
745             
746             for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
747                 for (int j = 0; j < calcAverages[i].size(); j++) {
748                     calcAverages[i][j].dist /= (float) iters;
749                 }
750             }
751             
752             //find standard deviation
753             vector< vector<seqDist>  > stdDev; stdDev.resize(sumCalculators.size());
754             for (int i = 0; i < stdDev.size(); i++) {  //initialize sums to zero.
755                 stdDev[i].resize(calcDistsTotals[0][i].size());
756                 
757                 for (int j = 0; j < stdDev[i].size(); j++) {
758                     stdDev[i][j].seq1 = calcDists[i][j].seq1;
759                     stdDev[i][j].seq2 = calcDists[i][j].seq2;
760                     stdDev[i][j].dist = 0.0;
761                 }
762             }
763             
764             for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
765                 for (int i = 0; i < stdDev.size(); i++) {  
766                     for (int j = 0; j < stdDev[i].size(); j++) {
767                         stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
768                     }
769                 }
770             }
771             
772             for (int i = 0; i < stdDev.size(); i++) {  //finds average.
773                 for (int j = 0; j < stdDev[i].size(); j++) {
774                     stdDev[i][j].dist /= (float) iters;
775                     stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
776                 }
777             }
778             
779             //print results
780             for (int i = 0; i < calcDists.size(); i++) {
781                 vector< vector<double> > matrix; //square matrix to represent the distance
782                 matrix.resize(thisLookup.size());
783                 for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
784                 
785                 vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
786                 stdmatrix.resize(thisLookup.size());
787                 for (int k = 0; k < thisLookup.size(); k++) {  stdmatrix[k].resize(thisLookup.size(), 0.0); }
788                 
789                 
790                 for (int j = 0; j < calcAverages[i].size(); j++) {
791                     int row = calcAverages[i][j].seq1;
792                     int column = calcAverages[i][j].seq2;
793                     float dist = calcAverages[i][j].dist;
794                     float stdDist = stdDev[i][j].dist;
795                     
796                     matrix[row][column] = dist;
797                     matrix[column][row] = dist;
798                     stdmatrix[row][column] = stdDist;
799                     stdmatrix[column][row] = stdDist;
800                 }
801                 
802                 string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel()  + "." + output + ".ave.dist";
803                 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
804                 ofstream outAve;
805                 m->openOutputFile(distFileName, outAve);
806                 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
807                 
808                 printSims(outAve, matrix);
809                 
810                 outAve.close();
811                 
812                 distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel()  + "." + output + ".std.dist";
813                 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
814                 ofstream outSTD;
815                 m->openOutputFile(distFileName, outSTD);
816                 outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
817                 
818                 printSims(outSTD, stdmatrix);
819                 
820                 outSTD.close();
821                 
822             }
823         }else {
824             if (createPhylip) {
825                 for (int i = 0; i < calcDists.size(); i++) {
826                     if (m->control_pressed) { break; }
827                     
828                     //initialize matrix
829                     vector< vector<double> > matrix; //square matrix to represent the distance
830                     matrix.resize(thisLookup.size());
831                     for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
832                     
833                     for (int j = 0; j < calcDists[i].size(); j++) {
834                         int row = calcDists[i][j].seq1;
835                         int column = calcDists[i][j].seq2;
836                         double dist = calcDists[i][j].dist;
837                         
838                         matrix[row][column] = dist;
839                         matrix[column][row] = dist;
840                     }
841                     
842                     string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel()  + "." + output + ".dist";
843                     outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
844                     ofstream outDist;
845                     m->openOutputFile(distFileName, outDist);
846                     outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
847                     
848                     printSims(outDist, matrix);
849                     
850                     outDist.close();
851                 }
852             }
853         }
854
855         return 0;
856         }
857         catch(exception& e) {
858                 m->errorOut(e, "SummarySharedCommand", "process");
859                 exit(1);
860         }
861 }
862 /**************************************************************************************************/
863 int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) { 
864         try {
865                 
866                 //loop through calculators and add to file all for all calcs that can do mutiple groups
867                 if (mult == true) {
868                         ofstream outAll;
869                         m->openOutputFile(sumAllFile, outAll);
870                         
871                         //output label
872                         outAll << thisLookup[0]->getLabel() << '\t';
873                         
874                         //output groups names
875                         string outNames = "";
876                         for (int j = 0; j < thisLookup.size(); j++) {
877                                 outNames += thisLookup[j]->getGroup() +  "-";
878                         }
879                         outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
880                         outAll << outNames << '\t';
881                         
882                         for(int i=0;i<sumCalculators.size();i++){
883                                 if (sumCalculators[i]->getMultiple() == true) { 
884                                         sumCalculators[i]->getValues(thisLookup);
885                                         
886                                         if (m->control_pressed) { outAll.close(); return 1; }
887                                         
888                                         outAll << '\t';
889                                         sumCalculators[i]->print(outAll);
890                                 }
891                         }
892                         outAll << endl;
893                         outAll.close();
894                 }
895                 
896                 ofstream outputFileHandle;
897                 m->openOutputFile(sumFile, outputFileHandle);
898                 
899                 vector<SharedRAbundVector*> subset;
900                 for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
901
902                         for (int l = 0; l < k; l++) {
903                                 
904                                 outputFileHandle << thisLookup[0]->getLabel() << '\t';
905                                 
906                                 subset.clear(); //clear out old pair of sharedrabunds
907                                 //add new pair of sharedrabunds
908                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
909                                 
910                                 //sort groups to be alphanumeric
911                                 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
912                                         outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
913                                 }else{
914                                         outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
915                                 }
916                                 
917                                 for(int i=0;i<sumCalculators.size();i++) {
918                                         
919                                         //if this calc needs all groups to calculate the pair load all groups
920                                         if (sumCalculators[i]->getNeedsAll()) { 
921                                                 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
922                                                 for (int w = 0; w < thisLookup.size(); w++) {
923                                                         if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
924                                                 }
925                                         }
926                                         
927                                         vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
928                                         
929                                         if (m->control_pressed) { outputFileHandle.close(); return 1; }
930                                         
931                                         outputFileHandle << '\t';
932                                         sumCalculators[i]->print(outputFileHandle);
933                                         
934                                         seqDist temp(l, k, tempdata[0]);
935                                         calcDists[i].push_back(temp);
936                                 }
937                                 outputFileHandle << endl;
938                         }
939                 }
940                 
941                 outputFileHandle.close();
942                 
943                 return 0;
944         }
945         catch(exception& e) {
946                 m->errorOut(e, "SummarySharedCommand", "driver");
947                 exit(1);
948         }
949 }
950 /**************************************************************************************************/
951
952