]> git.donarmstrong.com Git - mothur.git/blob - summarysharedcommand.cpp
working on pam
[mothur.git] / summarysharedcommand.cpp
1 /*
2  *  summarysharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarysharedcommand.h"
11 #include "subsample.h"
12
13 //**********************************************************************************************************************
14 vector<string> SummarySharedCommand::setParameters(){   
15         try {
16                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
17                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
18         CommandParameter psubsample("subsample", "String", "", "", "", "", "","phylip",false,false); parameters.push_back(psubsample);
19                 CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "","phylip",false,false); parameters.push_back(pdistance);
20                 CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson-jsd", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
21         CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput);
22                 CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
23         CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "SummarySharedCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string SummarySharedCommand::getHelpString(){   
40         try {
41                 string helpString = "";
42                 ValidCalculators validCalculator;
43                 helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all.  shared is required if there is no current sharedfile.\n";
44                 helpString += "The summary.shared command should be in the following format: \n";
45                 helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
46                 helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
47                 helpString +=  validCalculator.printCalc("sharedsummary");
48         helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
49         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
50         helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
51                 helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
52                 helpString += "The default value for groups is all the groups in your groupfile.\n";
53                 helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
54                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
55                 helpString += "The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
56                 helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
57                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
58                 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
59                 return helpString;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "SummarySharedCommand", "getHelpString");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 string SummarySharedCommand::getOutputPattern(string type) {
68     try {
69         string pattern = "";
70         
71         if (type == "summary") {  pattern = "[filename],summary-[filename],[tag],summary"; } 
72         else if (type == "phylip") {  pattern = "[filename],[calc],[distance],[outputtag],[tag2],dist"; } 
73         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
74         
75         return pattern;
76     }
77     catch(exception& e) {
78         m->errorOut(e, "SummarySharedCommand", "getOutputPattern");
79         exit(1);
80     }
81 }
82 //**********************************************************************************************************************
83 SummarySharedCommand::SummarySharedCommand(){   
84         try {
85                 abort = true; calledHelp = true; 
86                 setParameters();
87                 vector<string> tempOutNames;
88                 outputTypes["summary"] = tempOutNames;
89         outputTypes["phylip"] = tempOutNames;
90         }
91         catch(exception& e) {
92                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
93                 exit(1);
94         }
95 }
96 //**********************************************************************************************************************
97
98 SummarySharedCommand::SummarySharedCommand(string option)  {
99         try {
100                 abort = false; calledHelp = false;   
101                 allLines = 1;
102                                 
103                 //allow user to run help
104                 if(option == "help") {  help(); abort = true; calledHelp = true; }
105                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
106                 
107                 else {
108                         vector<string> myArray = setParameters();
109                         
110                         OptionParser parser(option);
111                         map<string, string> parameters = parser.getParameters();
112                         map<string, string>::iterator it;
113                         
114                         ValidParameters validParameter;
115                 
116                         //check to make sure all parameters are valid for command
117                         for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
118                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
119                         }
120                         
121                         //initialize outputTypes
122                         vector<string> tempOutNames;
123                         outputTypes["summary"] = tempOutNames;
124             outputTypes["phylip"] = tempOutNames;
125                         
126                         //if the user changes the input directory command factory will send this info to us in the output parameter 
127                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
128                         if (inputDir == "not found"){   inputDir = "";          }
129                         else {
130                                 string path;
131                                 it = parameters.find("shared");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
137                                 }
138                         }
139                         
140                         //get shared file
141                         sharedfile = validParameter.validFile(parameters, "shared", true);
142                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
143                         else if (sharedfile == "not found") { 
144                                 //if there is a current shared file, use it
145                                 sharedfile = m->getSharedFile(); 
146                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
147                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
148                         }else { m->setSharedFile(sharedfile); }
149                         
150                         
151                         //if the user changes the output directory command factory will send this info to us in the output parameter 
152                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
153                         
154
155                         //check for optional parameter and set defaults
156                         // ...at some point should added some additional type checking...
157                         label = validParameter.validFile(parameters, "label", false);                   
158                         if (label == "not found") { label = ""; }
159                         else { 
160                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
161                                 else { allLines = 1;  }
162                         }
163                         
164                                         
165                         calc = validParameter.validFile(parameters, "calc", false);                     
166                         if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
167                         else { 
168                                  if (calc == "default")  {  calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
169                         }
170                         m->splitAtDash(calc, Estimators);
171                         if (m->inUsersGroups("citation", Estimators)) { 
172                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
173                                 //remove citation from list of calcs
174                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
175                         }
176                         
177                         groups = validParameter.validFile(parameters, "groups", false);                 
178                         if (groups == "not found") { groups = ""; }
179                         else { 
180                                 m->splitAtDash(groups, Groups);
181                                 m->setGroups(Groups);
182                         }
183                         
184                         string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
185                         all = m->isTrue(temp);
186                         
187             temp = validParameter.validFile(parameters, "iters", false);                        if (temp == "not found") { temp = "1000"; }
188                         m->mothurConvert(temp, iters); 
189             
190             output = validParameter.validFile(parameters, "output", false);             
191             if(output == "not found"){  output = "lt"; }
192             else { createPhylip = true; }
193                         if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
194             
195             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
196                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
197             else {  
198                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
199                 else { subsample = false; }
200             }
201             
202             if (subsample == false) { iters = 0; }
203             
204             temp = validParameter.validFile(parameters, "distance", false);                                     if (temp == "not found") { temp = "false"; }
205                         createPhylip = m->isTrue(temp);
206             if (subsample) { createPhylip = true; }
207             
208                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
209                         m->setProcessors(temp);
210                         m->mothurConvert(temp, processors); 
211                         
212                         if (abort == false) {
213                         
214                                 ValidCalculators validCalculator;
215                                 int i;
216                                 
217                                 for (i=0; i<Estimators.size(); i++) {
218                                         if (validCalculator.isValidCalculator("sharedsummary", Estimators[i]) == true) { 
219                                                 if (Estimators[i] == "sharedsobs") { 
220                                                         sumCalculators.push_back(new SharedSobsCS());
221                                                 }else if (Estimators[i] == "sharedchao") { 
222                                                         sumCalculators.push_back(new SharedChao1());
223                                                 }else if (Estimators[i] == "sharedace") { 
224                                                         sumCalculators.push_back(new SharedAce());
225                                                 }else if (Estimators[i] == "jabund") {  
226                                                         sumCalculators.push_back(new JAbund());
227                                                 }else if (Estimators[i] == "sorabund") { 
228                                                         sumCalculators.push_back(new SorAbund());
229                                                 }else if (Estimators[i] == "jclass") { 
230                                                         sumCalculators.push_back(new Jclass());
231                                                 }else if (Estimators[i] == "sorclass") { 
232                                                         sumCalculators.push_back(new SorClass());
233                                                 }else if (Estimators[i] == "jest") { 
234                                                         sumCalculators.push_back(new Jest());
235                                                 }else if (Estimators[i] == "sorest") { 
236                                                         sumCalculators.push_back(new SorEst());
237                                                 }else if (Estimators[i] == "thetayc") { 
238                                                         sumCalculators.push_back(new ThetaYC());
239                                                 }else if (Estimators[i] == "thetan") { 
240                                                         sumCalculators.push_back(new ThetaN());
241                                                 }else if (Estimators[i] == "kstest") { 
242                                                         sumCalculators.push_back(new KSTest());
243                                                 }else if (Estimators[i] == "sharednseqs") { 
244                                                         sumCalculators.push_back(new SharedNSeqs());
245                                                 }else if (Estimators[i] == "ochiai") { 
246                                                         sumCalculators.push_back(new Ochiai());
247                                                 }else if (Estimators[i] == "anderberg") { 
248                                                         sumCalculators.push_back(new Anderberg());
249                                                 }else if (Estimators[i] == "kulczynski") { 
250                                                         sumCalculators.push_back(new Kulczynski());
251                                                 }else if (Estimators[i] == "kulczynskicody") { 
252                                                         sumCalculators.push_back(new KulczynskiCody());
253                                                 }else if (Estimators[i] == "lennon") { 
254                                                         sumCalculators.push_back(new Lennon());
255                                                 }else if (Estimators[i] == "morisitahorn") { 
256                                                         sumCalculators.push_back(new MorHorn());
257                                                 }else if (Estimators[i] == "braycurtis") { 
258                                                         sumCalculators.push_back(new BrayCurtis());
259                                                 }else if (Estimators[i] == "whittaker") { 
260                                                         sumCalculators.push_back(new Whittaker());
261                                                 }else if (Estimators[i] == "odum") { 
262                                                         sumCalculators.push_back(new Odum());
263                                                 }else if (Estimators[i] == "canberra") { 
264                                                         sumCalculators.push_back(new Canberra());
265                                                 }else if (Estimators[i] == "structeuclidean") { 
266                                                         sumCalculators.push_back(new StructEuclidean());
267                                                 }else if (Estimators[i] == "structchord") { 
268                                                         sumCalculators.push_back(new StructChord());
269                                                 }else if (Estimators[i] == "hellinger") { 
270                                                         sumCalculators.push_back(new Hellinger());
271                                                 }else if (Estimators[i] == "manhattan") { 
272                                                         sumCalculators.push_back(new Manhattan());
273                                                 }else if (Estimators[i] == "structpearson") { 
274                                                         sumCalculators.push_back(new StructPearson());
275                                                 }else if (Estimators[i] == "soergel") { 
276                                                         sumCalculators.push_back(new Soergel());
277                                                 }else if (Estimators[i] == "spearman") { 
278                                                         sumCalculators.push_back(new Spearman());
279                                                 }else if (Estimators[i] == "structkulczynski") { 
280                                                         sumCalculators.push_back(new StructKulczynski());
281                                                 }else if (Estimators[i] == "speciesprofile") { 
282                                                         sumCalculators.push_back(new SpeciesProfile());
283                                                 }else if (Estimators[i] == "hamming") { 
284                                                         sumCalculators.push_back(new Hamming());
285                                                 }else if (Estimators[i] == "structchi2") { 
286                                                         sumCalculators.push_back(new StructChi2());
287                                                 }else if (Estimators[i] == "gower") { 
288                                                         sumCalculators.push_back(new Gower());
289                                                 }else if (Estimators[i] == "memchi2") { 
290                                                         sumCalculators.push_back(new MemChi2());
291                                                 }else if (Estimators[i] == "memchord") { 
292                                                         sumCalculators.push_back(new MemChord());
293                                                 }else if (Estimators[i] == "memeuclidean") { 
294                                                         sumCalculators.push_back(new MemEuclidean());
295                                                 }else if (Estimators[i] == "mempearson") { 
296                                                         sumCalculators.push_back(new MemPearson());
297                                                 }else if (Estimators[i] == "jsd") {
298                                                         sumCalculators.push_back(new JSD());
299                                                 }
300                                         }
301                                 }
302                                 
303                                 mult = false;
304                         }
305                 }
306         }
307         catch(exception& e) {
308                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
309                 exit(1);
310         }
311 }
312 //**********************************************************************************************************************
313
314 int SummarySharedCommand::execute(){
315         try {
316         
317                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
318                 
319                 ofstream outputFileHandle, outAll;
320         map<string, string> variables; 
321                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
322                 string outputFileName = getOutputFileName("summary",variables);
323                 
324                 //if the users entered no valid calculators don't execute command
325                 if (sumCalculators.size() == 0) { return 0; }
326                 //check if any calcs can do multiples
327                 else{
328                         if (all){ 
329                                 for (int i = 0; i < sumCalculators.size(); i++) {
330                                         if (sumCalculators[i]->getMultiple() == true) { mult = true; }
331                                 }
332                         }
333                 }
334                         
335                 input = new InputData(sharedfile, "sharedfile");
336                 lookup = input->getSharedRAbundVectors();
337                 string lastLabel = lookup[0]->getLabel();
338         
339                 /******************************************************/
340                 //output headings for files
341                 /******************************************************/
342                 //output estimator names as column headers
343                 m->openOutputFile(outputFileName, outputFileHandle);
344                 outputFileHandle << "label" <<'\t' << "comparison" << '\t'; 
345                 for(int i=0;i<sumCalculators.size();i++){
346                         outputFileHandle << '\t' << sumCalculators[i]->getName();
347                         if (sumCalculators[i]->getCols() == 3) {   outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";  }
348                 }
349                 outputFileHandle << endl;
350                 outputFileHandle.close();
351                 
352                 //create file and put column headers for multiple groups file
353         variables["[tag]"]= "multiple";
354                 string outAllFileName = getOutputFileName("summary",variables);
355                 if (mult == true) {
356                         m->openOutputFile(outAllFileName, outAll);
357                         outputNames.push_back(outAllFileName);
358                         
359                         outAll << "label" <<'\t' << "comparison" << '\t'; 
360                         for(int i=0;i<sumCalculators.size();i++){
361                                 if (sumCalculators[i]->getMultiple() == true) { 
362                                         outAll << '\t' << sumCalculators[i]->getName();
363                                 }
364                         }
365                         outAll << endl;
366                         outAll.close();
367                 }
368                 
369                 if (lookup.size() < 2) { 
370                         m->mothurOut("I cannot run the command without at least 2 valid groups."); 
371                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
372                         
373                         //close files and clean up
374                         m->mothurRemove(outputFileName);
375                         if (mult == true) { m->mothurRemove(outAllFileName);  }
376                         return 0;
377                 //if you only have 2 groups you don't need a .sharedmultiple file
378                 }else if ((lookup.size() == 2) && (mult == true)) { 
379                         mult = false;
380                         m->mothurRemove(outAllFileName);
381                         outputNames.pop_back();
382                 }
383                 
384                 if (m->control_pressed) {
385                         if (mult) {  m->mothurRemove(outAllFileName);  }
386                         m->mothurRemove(outputFileName); 
387                         delete input;
388                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
389                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
390                         m->clearGroups(); 
391                         return 0;
392                 }
393                 /******************************************************/
394         if (subsample) { 
395             if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
396                 subsampleSize = lookup[0]->getNumSeqs();
397                 for (int i = 1; i < lookup.size(); i++) {
398                     int thisSize = lookup[i]->getNumSeqs();
399                     
400                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
401                 }
402             }else {
403                 m->clearGroups();
404                 Groups.clear();
405                 vector<SharedRAbundVector*> temp;
406                 for (int i = 0; i < lookup.size(); i++) {
407                     if (lookup[i]->getNumSeqs() < subsampleSize) { 
408                         m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
409                         delete lookup[i];
410                     }else { 
411                         Groups.push_back(lookup[i]->getGroup()); 
412                         temp.push_back(lookup[i]);
413                     }
414                 } 
415                 lookup = temp;
416                 m->setGroups(Groups);
417             }
418             
419             if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; delete input; return 0; }
420         }
421
422                 
423                 /******************************************************/
424                 //comparison breakup to be used by different processes later
425                 numGroups = lookup.size();
426                 lines.resize(processors);
427                 for (int i = 0; i < processors; i++) {
428                         lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
429                         lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
430                 }               
431                 /******************************************************/
432                 
433                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
434                 set<string> processedLabels;
435                 set<string> userLabels = labels;
436                         
437                 //as long as you are not at the end of the file or done wih the lines you want
438                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
439                         if (m->control_pressed) {
440                                 if (mult) {  m->mothurRemove(outAllFileName);  }
441                                 m->mothurRemove(outputFileName); 
442                                 delete input; 
443                                 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
444                                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
445                                 m->clearGroups(); 
446                                 return 0;
447                         }
448
449                 
450                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
451                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
452                                 process(lookup, outputFileName, outAllFileName);
453                                 
454                                 processedLabels.insert(lookup[0]->getLabel());
455                                 userLabels.erase(lookup[0]->getLabel());
456                         }
457                         
458                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
459                                         string saveLabel = lookup[0]->getLabel();
460                                         
461                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
462                                         lookup = input->getSharedRAbundVectors(lastLabel);
463
464                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
465                                         process(lookup, outputFileName, outAllFileName);
466                                         
467                                         processedLabels.insert(lookup[0]->getLabel());
468                                         userLabels.erase(lookup[0]->getLabel());
469                                         
470                                         //restore real lastlabel to save below
471                                         lookup[0]->setLabel(saveLabel);
472                         }
473                         
474                         lastLabel = lookup[0]->getLabel();                      
475                                 
476                         //get next line to process
477                         //prevent memory leak
478                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
479                         lookup = input->getSharedRAbundVectors();
480                 }
481                 
482                 if (m->control_pressed) {
483                         if (mult) { m->mothurRemove(outAllFileName);  }
484                         m->mothurRemove(outputFileName); 
485                         delete input; 
486                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
487                         m->clearGroups(); 
488                         return 0;
489                 }
490
491                 //output error messages about any remaining user labels
492                 set<string>::iterator it;
493                 bool needToRun = false;
494                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
495                         m->mothurOut("Your file does not include the label " + *it); 
496                         if (processedLabels.count(lastLabel) != 1) {
497                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
498                                 needToRun = true;
499                         }else {
500                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
501                         }
502                 }
503                 
504                 //run last label if you need to
505                 if (needToRun == true)  {
506                                 for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
507                                 lookup = input->getSharedRAbundVectors(lastLabel);
508
509                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
510                                 process(lookup, outputFileName, outAllFileName);
511                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
512                 }
513                 
514                                 
515                 //reset groups parameter
516                 m->clearGroups();  
517                 
518                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
519                 delete input;  
520                 
521                 if (m->control_pressed) {
522                         m->mothurRemove(outAllFileName);  
523                         m->mothurRemove(outputFileName); 
524                         return 0;
525                 }
526                 
527                 m->mothurOutEndLine();
528                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
529                 m->mothurOut(outputFileName); m->mothurOutEndLine();    
530                 if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine();        outputTypes["summary"].push_back(outAllFileName); }
531                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    } outputTypes["summary"].push_back(outputFileName);
532                 m->mothurOutEndLine();
533
534                 return 0;
535         }
536         catch(exception& e) {
537                 m->errorOut(e, "SummarySharedCommand", "execute");
538                 exit(1);
539         }
540 }
541 /***********************************************************/
542 int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
543         try {
544                 
545                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
546                 
547                 //output num seqs
548                 out << simMatrix.size() << endl;
549                 
550                 if (output == "lt") {
551                         for (int b = 0; b < simMatrix.size(); b++)      {
552                                 out << lookup[b]->getGroup() << '\t';
553                                 for (int n = 0; n < b; n++)     {
554                     if (m->control_pressed) { return 0; }
555                                         out << simMatrix[b][n] << '\t'; 
556                                 }
557                                 out << endl;
558                         }
559                 }else{
560                         for (int b = 0; b < simMatrix.size(); m++)      {
561                                 out << lookup[b]->getGroup() << '\t';
562                                 for (int n = 0; n < simMatrix[b].size(); n++)   {
563                     if (m->control_pressed) { return 0; }
564                                         out << simMatrix[b][n] << '\t'; 
565                                 }
566                                 out << endl;
567                         }
568                 }
569         
570         return 0;
571         }
572         catch(exception& e) {
573                 m->errorOut(e, "SummarySharedCommand", "printSims");
574                 exit(1);
575         }
576 }
577 /***********************************************************/
578 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
579         try {
580         vector< vector< vector<seqDist> > > calcDistsTotals;  //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
581         vector< vector<seqDist>  > calcDists; calcDists.resize(sumCalculators.size());          
582         
583         for (int thisIter = 0; thisIter < iters+1; thisIter++) {
584             
585             vector<SharedRAbundVector*> thisItersLookup = thisLookup;
586             
587             if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
588                 SubSample sample;
589                 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
590                 
591                 //make copy of lookup so we don't get access violations
592                 vector<SharedRAbundVector*> newLookup;
593                 for (int k = 0; k < thisItersLookup.size(); k++) {
594                     SharedRAbundVector* temp = new SharedRAbundVector();
595                     temp->setLabel(thisItersLookup[k]->getLabel());
596                     temp->setGroup(thisItersLookup[k]->getGroup());
597                     newLookup.push_back(temp);
598                 }
599                 
600                 //for each bin
601                 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
602                     if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
603                     for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
604                 }
605                 
606                 tempLabels = sample.getSample(newLookup, subsampleSize);
607                 thisItersLookup = newLookup;
608             }
609         
610             
611             if(processors == 1){
612                 driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
613                 m->appendFiles((sumFileName + ".temp"), sumFileName);
614                 m->mothurRemove((sumFileName + ".temp"));
615                 if (mult) {
616                     m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
617                     m->mothurRemove((sumAllFileName + ".temp"));
618                 }
619             }else{
620                 
621                 int process = 1;
622                 vector<int> processIDS;
623                 
624 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
625                 //loop through and create all the processes you want
626                 while (process != processors) {
627                     int pid = fork();
628                     
629                     if (pid > 0) {
630                         processIDS.push_back(pid); 
631                         process++;
632                     }else if (pid == 0){
633                         driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
634                         
635                         //only do this if you want a distance file
636                         if (createPhylip) {
637                             string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
638                             ofstream outtemp;
639                             m->openOutputFile(tempdistFileName, outtemp);
640                             
641                             for (int i = 0; i < calcDists.size(); i++) {
642                                 outtemp << calcDists[i].size() << endl;
643                                 
644                                 for (int j = 0; j < calcDists[i].size(); j++) {
645                                     outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
646                                 }
647                             }
648                             outtemp.close();
649                         }
650                         
651                         exit(0);
652                     }else { 
653                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
654                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
655                         exit(0);
656                     }
657                 }
658                 
659                 //parent do your part
660                 driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
661                 m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
662                 m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
663                 if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
664                 
665                 //force parent to wait until all the processes are done
666                 for (int i = 0; i < processIDS.size(); i++) {
667                     int temp = processIDS[i];
668                     wait(&temp);
669                 }
670                 
671                 for (int i = 0; i < processIDS.size(); i++) {
672                     m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
673                     m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
674                     if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp"));  }
675                     
676                     if (createPhylip) {
677                         string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) +  ".dist";
678                         ifstream intemp;
679                         m->openInputFile(tempdistFileName, intemp);
680                         
681                         for (int k = 0; k < calcDists.size(); k++) {
682                             int size = 0;
683                             intemp >> size; m->gobble(intemp);
684                             
685                             for (int j = 0; j < size; j++) {
686                                 int seq1 = 0;
687                                 int seq2 = 0;
688                                 float dist = 1.0;
689                                 
690                                 intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
691                                 
692                                 seqDist tempDist(seq1, seq2, dist);
693                                 calcDists[k].push_back(tempDist);
694                             }
695                         }
696                         intemp.close();
697                         m->mothurRemove(tempdistFileName);
698                     }
699                 }
700 #else
701                 //////////////////////////////////////////////////////////////////////////////////////////////////////
702                 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
703                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
704                 //Taking advantage of shared memory to pass results vectors.
705                 //////////////////////////////////////////////////////////////////////////////////////////////////////
706                 
707                 vector<summarySharedData*> pDataArray; 
708                 DWORD   dwThreadIdArray[processors-1];
709                 HANDLE  hThreadArray[processors-1];
710                 
711                 //Create processor worker threads.
712                 for( int i=1; i<processors; i++ ){
713                     
714                     //make copy of lookup so we don't get access violations
715                     vector<SharedRAbundVector*> newLookup;
716                     for (int k = 0; k < thisLookup.size(); k++) {
717                         SharedRAbundVector* temp = new SharedRAbundVector();
718                         temp->setLabel(thisLookup[k]->getLabel());
719                         temp->setGroup(thisLookup[k]->getGroup());
720                         newLookup.push_back(temp);
721                     }
722                 
723                     
724                     //for each bin
725                     for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
726                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
727                         for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
728                     }
729                     
730                     // Allocate memory for thread data.
731                     summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
732                     pDataArray.push_back(tempSum);
733                     processIDS.push_back(i);
734                     
735                     hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
736                 }
737                 
738                 //parent do your part
739                 driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);   
740                 m->appendFiles((sumFileName + "0.temp"), sumFileName);
741                 m->mothurRemove((sumFileName + "0.temp"));
742                 if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
743                 
744                 //Wait until all threads have terminated.
745                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
746                 
747                 //Close all thread handles and free memory allocations.
748                 for(int i=0; i < pDataArray.size(); i++){
749                     if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
750                         m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
751                     }
752                     m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
753                     m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
754                     
755                     for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
756                     
757                     if (createPhylip) {
758                         for (int k = 0; k < calcDists.size(); k++) {
759                             int size = pDataArray[i]->calcDists[k].size();
760                             for (int j = 0; j < size; j++) {    calcDists[k].push_back(pDataArray[i]->calcDists[k][j]);    }
761                         }
762                     }
763                     
764                     CloseHandle(hThreadArray[i]);
765                     delete pDataArray[i];
766                 }
767                 
768 #endif
769             }
770             
771             if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
772                 
773                 calcDistsTotals.push_back(calcDists); 
774                 //clean up memory
775                 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
776                 thisItersLookup.clear();
777             }else {
778                 if (createPhylip) {
779                     for (int i = 0; i < calcDists.size(); i++) {
780                         if (m->control_pressed) { break; }
781                         
782                         //initialize matrix
783                         vector< vector<double> > matrix; //square matrix to represent the distance
784                         matrix.resize(thisLookup.size());
785                         for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
786                         
787                         for (int j = 0; j < calcDists[i].size(); j++) {
788                             int row = calcDists[i][j].seq1;
789                             int column = calcDists[i][j].seq2;
790                             double dist = calcDists[i][j].dist;
791                             
792                             matrix[row][column] = dist;
793                             matrix[column][row] = dist;
794                         }
795                         
796                         map<string, string> variables; 
797                         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
798                         variables["[calc]"] = sumCalculators[i]->getName();
799                         variables["[distance]"] = thisLookup[0]->getLabel();
800                         variables["[outputtag]"] = output;
801                         variables["[tag2]"] = "";
802                         string distFileName = getOutputFileName("phylip",variables);
803                         outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
804                         ofstream outDist;
805                         m->openOutputFile(distFileName, outDist);
806                         outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
807                         
808                         printSims(outDist, matrix);
809                         
810                         outDist.close();
811                     }
812                 }
813             }
814             for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
815                 }
816
817         if (iters != 0) {
818             //we need to find the average distance and standard deviation for each groups distance
819             vector< vector<seqDist>  > calcAverages = m->getAverages(calcDistsTotals);
820             
821             //find standard deviation
822             vector< vector<seqDist>  > stdDev = m->getStandardDeviation(calcDistsTotals, calcAverages); 
823             
824             //print results
825             for (int i = 0; i < calcDists.size(); i++) {
826                 vector< vector<double> > matrix; //square matrix to represent the distance
827                 matrix.resize(thisLookup.size());
828                 for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
829                 
830                 vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
831                 stdmatrix.resize(thisLookup.size());
832                 for (int k = 0; k < thisLookup.size(); k++) {  stdmatrix[k].resize(thisLookup.size(), 0.0); }
833                 
834                 
835                 for (int j = 0; j < calcAverages[i].size(); j++) {
836                     int row = calcAverages[i][j].seq1;
837                     int column = calcAverages[i][j].seq2;
838                     float dist = calcAverages[i][j].dist;
839                     float stdDist = stdDev[i][j].dist;
840                     
841                     matrix[row][column] = dist;
842                     matrix[column][row] = dist;
843                     stdmatrix[row][column] = stdDist;
844                     stdmatrix[column][row] = stdDist;
845                 }
846                 
847                 map<string, string> variables; 
848                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
849                 variables["[calc]"] = sumCalculators[i]->getName();
850                 variables["[distance]"] = thisLookup[0]->getLabel();
851                 variables["[outputtag]"] = output;
852                 variables["[tag2]"] = "ave";
853                 string distFileName = getOutputFileName("phylip",variables);
854                 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
855                 ofstream outAve;
856                 m->openOutputFile(distFileName, outAve);
857                 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
858                 
859                 printSims(outAve, matrix);
860                 
861                 outAve.close();
862                 
863                 variables["[tag2]"] = "std";
864                 distFileName = getOutputFileName("phylip",variables);
865                 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
866                 ofstream outSTD;
867                 m->openOutputFile(distFileName, outSTD);
868                 outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
869                 
870                 printSims(outSTD, stdmatrix);
871                 
872                 outSTD.close();
873                 
874             }
875         }
876         
877         return 0;
878         }
879         catch(exception& e) {
880                 m->errorOut(e, "SummarySharedCommand", "process");
881                 exit(1);
882         }
883 }
884 /**************************************************************************************************/
885 int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) { 
886         try {
887                 
888                 //loop through calculators and add to file all for all calcs that can do mutiple groups
889                 if (mult == true) {
890                         ofstream outAll;
891                         m->openOutputFile(sumAllFile, outAll);
892                         
893                         //output label
894                         outAll << thisLookup[0]->getLabel() << '\t';
895                         
896                         //output groups names
897                         string outNames = "";
898                         for (int j = 0; j < thisLookup.size(); j++) {
899                                 outNames += thisLookup[j]->getGroup() +  "-";
900                         }
901                         outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
902                         outAll << outNames << '\t';
903                         
904                         for(int i=0;i<sumCalculators.size();i++){
905                                 if (sumCalculators[i]->getMultiple() == true) { 
906                                         sumCalculators[i]->getValues(thisLookup);
907                                         
908                                         if (m->control_pressed) { outAll.close(); return 1; }
909                                         
910                                         outAll << '\t';
911                                         sumCalculators[i]->print(outAll);
912                                 }
913                         }
914                         outAll << endl;
915                         outAll.close();
916                 }
917                 
918                 ofstream outputFileHandle;
919                 m->openOutputFile(sumFile, outputFileHandle);
920                 
921                 vector<SharedRAbundVector*> subset;
922                 for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
923
924                         for (int l = 0; l < k; l++) {
925                                 
926                                 outputFileHandle << thisLookup[0]->getLabel() << '\t';
927                                 
928                                 subset.clear(); //clear out old pair of sharedrabunds
929                                 //add new pair of sharedrabunds
930                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
931                                 
932                                 //sort groups to be alphanumeric
933                                 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
934                                         outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
935                                 }else{
936                                         outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
937                                 }
938                                 
939                                 for(int i=0;i<sumCalculators.size();i++) {
940                                         
941                                         //if this calc needs all groups to calculate the pair load all groups
942                                         if (sumCalculators[i]->getNeedsAll()) { 
943                                                 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
944                                                 for (int w = 0; w < thisLookup.size(); w++) {
945                                                         if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
946                                                 }
947                                         }
948                                         
949                                         vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
950                                         
951                                         if (m->control_pressed) { outputFileHandle.close(); return 1; }
952                                         
953                                         outputFileHandle << '\t';
954                                         sumCalculators[i]->print(outputFileHandle);
955                                         
956                                         seqDist temp(l, k, tempdata[0]);
957                                         calcDists[i].push_back(temp);
958                                 }
959                                 outputFileHandle << endl;
960                         }
961                 }
962                 
963                 outputFileHandle.close();
964                 
965                 return 0;
966         }
967         catch(exception& e) {
968                 m->errorOut(e, "SummarySharedCommand", "driver");
969                 exit(1);
970         }
971 }
972 /**************************************************************************************************/
973
974