]> git.donarmstrong.com Git - mothur.git/blob - summarysharedcommand.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / summarysharedcommand.cpp
1 /*
2  *  summarysharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarysharedcommand.h"
11 #include "subsample.h"
12
13 //**********************************************************************************************************************
14 vector<string> SummarySharedCommand::setParameters(){   
15         try {
16                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
17                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
18         CommandParameter psubsample("subsample", "String", "", "", "", "", "","phylip",false,false); parameters.push_back(psubsample);
19                 CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "","phylip",false,false); parameters.push_back(pdistance);
20                 CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
21         CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput);
22                 CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
23         CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "SummarySharedCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string SummarySharedCommand::getHelpString(){   
40         try {
41                 string helpString = "";
42                 ValidCalculators validCalculator;
43                 helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all.  shared is required if there is no current sharedfile.\n";
44                 helpString += "The summary.shared command should be in the following format: \n";
45                 helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
46                 helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
47                 helpString +=  validCalculator.printCalc("sharedsummary");
48         helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
49         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
50         helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
51                 helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
52                 helpString += "The default value for groups is all the groups in your groupfile.\n";
53                 helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
54                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
55                 helpString += "The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
56                 helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
57                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
58                 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
59                 return helpString;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "SummarySharedCommand", "getHelpString");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 string SummarySharedCommand::getOutputPattern(string type) {
68     try {
69         string pattern = "";
70         
71         if (type == "summary") {  pattern = "[filename],summary-[filename],[tag],summary"; } 
72         else if (type == "phylip") {  pattern = "[filename],[calc],[distance],[outputtag],[tag2],dist"; } 
73         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
74         
75         return pattern;
76     }
77     catch(exception& e) {
78         m->errorOut(e, "SummarySharedCommand", "getOutputPattern");
79         exit(1);
80     }
81 }
82 //**********************************************************************************************************************
83 SummarySharedCommand::SummarySharedCommand(){   
84         try {
85                 abort = true; calledHelp = true; 
86                 setParameters();
87                 vector<string> tempOutNames;
88                 outputTypes["summary"] = tempOutNames;
89         outputTypes["phylip"] = tempOutNames;
90         }
91         catch(exception& e) {
92                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
93                 exit(1);
94         }
95 }
96 //**********************************************************************************************************************
97
98 SummarySharedCommand::SummarySharedCommand(string option)  {
99         try {
100                 abort = false; calledHelp = false;   
101                 allLines = 1;
102                                 
103                 //allow user to run help
104                 if(option == "help") {  help(); abort = true; calledHelp = true; }
105                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
106                 
107                 else {
108                         vector<string> myArray = setParameters();
109                         
110                         OptionParser parser(option);
111                         map<string, string> parameters = parser.getParameters();
112                         map<string, string>::iterator it;
113                         
114                         ValidParameters validParameter;
115                 
116                         //check to make sure all parameters are valid for command
117                         for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
118                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
119                         }
120                         
121                         //initialize outputTypes
122                         vector<string> tempOutNames;
123                         outputTypes["summary"] = tempOutNames;
124             outputTypes["phylip"] = tempOutNames;
125                         
126                         //if the user changes the input directory command factory will send this info to us in the output parameter 
127                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
128                         if (inputDir == "not found"){   inputDir = "";          }
129                         else {
130                                 string path;
131                                 it = parameters.find("shared");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
137                                 }
138                         }
139                         
140                         //get shared file
141                         sharedfile = validParameter.validFile(parameters, "shared", true);
142                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
143                         else if (sharedfile == "not found") { 
144                                 //if there is a current shared file, use it
145                                 sharedfile = m->getSharedFile(); 
146                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
147                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
148                         }else { m->setSharedFile(sharedfile); }
149                         
150                         
151                         //if the user changes the output directory command factory will send this info to us in the output parameter 
152                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
153                         
154
155                         //check for optional parameter and set defaults
156                         // ...at some point should added some additional type checking...
157                         label = validParameter.validFile(parameters, "label", false);                   
158                         if (label == "not found") { label = ""; }
159                         else { 
160                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
161                                 else { allLines = 1;  }
162                         }
163                         
164                                         
165                         calc = validParameter.validFile(parameters, "calc", false);                     
166                         if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
167                         else { 
168                                  if (calc == "default")  {  calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
169                         }
170                         m->splitAtDash(calc, Estimators);
171                         if (m->inUsersGroups("citation", Estimators)) { 
172                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
173                                 //remove citation from list of calcs
174                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
175                         }
176                         
177                         groups = validParameter.validFile(parameters, "groups", false);                 
178                         if (groups == "not found") { groups = ""; }
179                         else { 
180                                 m->splitAtDash(groups, Groups);
181                                 m->setGroups(Groups);
182                         }
183                         
184                         string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
185                         all = m->isTrue(temp);
186                         
187                         temp = validParameter.validFile(parameters, "distance", false);                                 if (temp == "not found") { temp = "false"; }
188                         createPhylip = m->isTrue(temp);
189                         
190             temp = validParameter.validFile(parameters, "iters", false);                        if (temp == "not found") { temp = "1000"; }
191                         m->mothurConvert(temp, iters); 
192             
193             output = validParameter.validFile(parameters, "output", false);             
194             if(output == "not found"){  output = "lt"; }
195             else { createPhylip = true; }
196                         if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
197             
198             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
199                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
200             else {  
201                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
202                 else { subsample = false; }
203             }
204             
205             if (subsample == false) { iters = 0; }
206             
207                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
208                         m->setProcessors(temp);
209                         m->mothurConvert(temp, processors); 
210                         
211                         if (abort == false) {
212                         
213                                 ValidCalculators validCalculator;
214                                 int i;
215                                 
216                                 for (i=0; i<Estimators.size(); i++) {
217                                         if (validCalculator.isValidCalculator("sharedsummary", Estimators[i]) == true) { 
218                                                 if (Estimators[i] == "sharedsobs") { 
219                                                         sumCalculators.push_back(new SharedSobsCS());
220                                                 }else if (Estimators[i] == "sharedchao") { 
221                                                         sumCalculators.push_back(new SharedChao1());
222                                                 }else if (Estimators[i] == "sharedace") { 
223                                                         sumCalculators.push_back(new SharedAce());
224                                                 }else if (Estimators[i] == "jabund") {  
225                                                         sumCalculators.push_back(new JAbund());
226                                                 }else if (Estimators[i] == "sorabund") { 
227                                                         sumCalculators.push_back(new SorAbund());
228                                                 }else if (Estimators[i] == "jclass") { 
229                                                         sumCalculators.push_back(new Jclass());
230                                                 }else if (Estimators[i] == "sorclass") { 
231                                                         sumCalculators.push_back(new SorClass());
232                                                 }else if (Estimators[i] == "jest") { 
233                                                         sumCalculators.push_back(new Jest());
234                                                 }else if (Estimators[i] == "sorest") { 
235                                                         sumCalculators.push_back(new SorEst());
236                                                 }else if (Estimators[i] == "thetayc") { 
237                                                         sumCalculators.push_back(new ThetaYC());
238                                                 }else if (Estimators[i] == "thetan") { 
239                                                         sumCalculators.push_back(new ThetaN());
240                                                 }else if (Estimators[i] == "kstest") { 
241                                                         sumCalculators.push_back(new KSTest());
242                                                 }else if (Estimators[i] == "sharednseqs") { 
243                                                         sumCalculators.push_back(new SharedNSeqs());
244                                                 }else if (Estimators[i] == "ochiai") { 
245                                                         sumCalculators.push_back(new Ochiai());
246                                                 }else if (Estimators[i] == "anderberg") { 
247                                                         sumCalculators.push_back(new Anderberg());
248                                                 }else if (Estimators[i] == "kulczynski") { 
249                                                         sumCalculators.push_back(new Kulczynski());
250                                                 }else if (Estimators[i] == "kulczynskicody") { 
251                                                         sumCalculators.push_back(new KulczynskiCody());
252                                                 }else if (Estimators[i] == "lennon") { 
253                                                         sumCalculators.push_back(new Lennon());
254                                                 }else if (Estimators[i] == "morisitahorn") { 
255                                                         sumCalculators.push_back(new MorHorn());
256                                                 }else if (Estimators[i] == "braycurtis") { 
257                                                         sumCalculators.push_back(new BrayCurtis());
258                                                 }else if (Estimators[i] == "whittaker") { 
259                                                         sumCalculators.push_back(new Whittaker());
260                                                 }else if (Estimators[i] == "odum") { 
261                                                         sumCalculators.push_back(new Odum());
262                                                 }else if (Estimators[i] == "canberra") { 
263                                                         sumCalculators.push_back(new Canberra());
264                                                 }else if (Estimators[i] == "structeuclidean") { 
265                                                         sumCalculators.push_back(new StructEuclidean());
266                                                 }else if (Estimators[i] == "structchord") { 
267                                                         sumCalculators.push_back(new StructChord());
268                                                 }else if (Estimators[i] == "hellinger") { 
269                                                         sumCalculators.push_back(new Hellinger());
270                                                 }else if (Estimators[i] == "manhattan") { 
271                                                         sumCalculators.push_back(new Manhattan());
272                                                 }else if (Estimators[i] == "structpearson") { 
273                                                         sumCalculators.push_back(new StructPearson());
274                                                 }else if (Estimators[i] == "soergel") { 
275                                                         sumCalculators.push_back(new Soergel());
276                                                 }else if (Estimators[i] == "spearman") { 
277                                                         sumCalculators.push_back(new Spearman());
278                                                 }else if (Estimators[i] == "structkulczynski") { 
279                                                         sumCalculators.push_back(new StructKulczynski());
280                                                 }else if (Estimators[i] == "speciesprofile") { 
281                                                         sumCalculators.push_back(new SpeciesProfile());
282                                                 }else if (Estimators[i] == "hamming") { 
283                                                         sumCalculators.push_back(new Hamming());
284                                                 }else if (Estimators[i] == "structchi2") { 
285                                                         sumCalculators.push_back(new StructChi2());
286                                                 }else if (Estimators[i] == "gower") { 
287                                                         sumCalculators.push_back(new Gower());
288                                                 }else if (Estimators[i] == "memchi2") { 
289                                                         sumCalculators.push_back(new MemChi2());
290                                                 }else if (Estimators[i] == "memchord") { 
291                                                         sumCalculators.push_back(new MemChord());
292                                                 }else if (Estimators[i] == "memeuclidean") { 
293                                                         sumCalculators.push_back(new MemEuclidean());
294                                                 }else if (Estimators[i] == "mempearson") { 
295                                                         sumCalculators.push_back(new MemPearson());
296                                                 }
297                                         }
298                                 }
299                                 
300                                 mult = false;
301                         }
302                 }
303         }
304         catch(exception& e) {
305                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
306                 exit(1);
307         }
308 }
309 //**********************************************************************************************************************
310
311 int SummarySharedCommand::execute(){
312         try {
313         
314                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
315                 
316                 ofstream outputFileHandle, outAll;
317         map<string, string> variables; 
318                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
319                 string outputFileName = getOutputFileName("summary",variables);
320                 
321                 //if the users entered no valid calculators don't execute command
322                 if (sumCalculators.size() == 0) { return 0; }
323                 //check if any calcs can do multiples
324                 else{
325                         if (all){ 
326                                 for (int i = 0; i < sumCalculators.size(); i++) {
327                                         if (sumCalculators[i]->getMultiple() == true) { mult = true; }
328                                 }
329                         }
330                 }
331                         
332                 input = new InputData(sharedfile, "sharedfile");
333                 lookup = input->getSharedRAbundVectors();
334                 string lastLabel = lookup[0]->getLabel();
335         
336                 /******************************************************/
337                 //output headings for files
338                 /******************************************************/
339                 //output estimator names as column headers
340                 m->openOutputFile(outputFileName, outputFileHandle);
341                 outputFileHandle << "label" <<'\t' << "comparison" << '\t'; 
342                 for(int i=0;i<sumCalculators.size();i++){
343                         outputFileHandle << '\t' << sumCalculators[i]->getName();
344                         if (sumCalculators[i]->getCols() == 3) {   outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";  }
345                 }
346                 outputFileHandle << endl;
347                 outputFileHandle.close();
348                 
349                 //create file and put column headers for multiple groups file
350         variables["[tag]"]= "multiple";
351                 string outAllFileName = getOutputFileName("summary",variables);
352                 if (mult == true) {
353                         m->openOutputFile(outAllFileName, outAll);
354                         outputNames.push_back(outAllFileName);
355                         
356                         outAll << "label" <<'\t' << "comparison" << '\t'; 
357                         for(int i=0;i<sumCalculators.size();i++){
358                                 if (sumCalculators[i]->getMultiple() == true) { 
359                                         outAll << '\t' << sumCalculators[i]->getName();
360                                 }
361                         }
362                         outAll << endl;
363                         outAll.close();
364                 }
365                 
366                 if (lookup.size() < 2) { 
367                         m->mothurOut("I cannot run the command without at least 2 valid groups."); 
368                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
369                         
370                         //close files and clean up
371                         m->mothurRemove(outputFileName);
372                         if (mult == true) { m->mothurRemove(outAllFileName);  }
373                         return 0;
374                 //if you only have 2 groups you don't need a .sharedmultiple file
375                 }else if ((lookup.size() == 2) && (mult == true)) { 
376                         mult = false;
377                         m->mothurRemove(outAllFileName);
378                         outputNames.pop_back();
379                 }
380                 
381                 if (m->control_pressed) {
382                         if (mult) {  m->mothurRemove(outAllFileName);  }
383                         m->mothurRemove(outputFileName); 
384                         delete input;
385                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
386                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
387                         m->clearGroups(); 
388                         return 0;
389                 }
390                 /******************************************************/
391         if (subsample) { 
392             if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
393                 subsampleSize = lookup[0]->getNumSeqs();
394                 for (int i = 1; i < lookup.size(); i++) {
395                     int thisSize = lookup[i]->getNumSeqs();
396                     
397                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
398                 }
399             }else {
400                 m->clearGroups();
401                 Groups.clear();
402                 vector<SharedRAbundVector*> temp;
403                 for (int i = 0; i < lookup.size(); i++) {
404                     if (lookup[i]->getNumSeqs() < subsampleSize) { 
405                         m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
406                         delete lookup[i];
407                     }else { 
408                         Groups.push_back(lookup[i]->getGroup()); 
409                         temp.push_back(lookup[i]);
410                     }
411                 } 
412                 lookup = temp;
413                 m->setGroups(Groups);
414             }
415             
416             if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; delete input; return 0; }
417         }
418
419                 
420                 /******************************************************/
421                 //comparison breakup to be used by different processes later
422                 numGroups = lookup.size();
423                 lines.resize(processors);
424                 for (int i = 0; i < processors; i++) {
425                         lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
426                         lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
427                 }               
428                 /******************************************************/
429                 
430                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
431                 set<string> processedLabels;
432                 set<string> userLabels = labels;
433                         
434                 //as long as you are not at the end of the file or done wih the lines you want
435                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
436                         if (m->control_pressed) {
437                                 if (mult) {  m->mothurRemove(outAllFileName);  }
438                                 m->mothurRemove(outputFileName); 
439                                 delete input; 
440                                 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
441                                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
442                                 m->clearGroups(); 
443                                 return 0;
444                         }
445
446                 
447                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
448                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
449                                 process(lookup, outputFileName, outAllFileName);
450                                 
451                                 processedLabels.insert(lookup[0]->getLabel());
452                                 userLabels.erase(lookup[0]->getLabel());
453                         }
454                         
455                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
456                                         string saveLabel = lookup[0]->getLabel();
457                                         
458                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
459                                         lookup = input->getSharedRAbundVectors(lastLabel);
460
461                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
462                                         process(lookup, outputFileName, outAllFileName);
463                                         
464                                         processedLabels.insert(lookup[0]->getLabel());
465                                         userLabels.erase(lookup[0]->getLabel());
466                                         
467                                         //restore real lastlabel to save below
468                                         lookup[0]->setLabel(saveLabel);
469                         }
470                         
471                         lastLabel = lookup[0]->getLabel();                      
472                                 
473                         //get next line to process
474                         //prevent memory leak
475                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
476                         lookup = input->getSharedRAbundVectors();
477                 }
478                 
479                 if (m->control_pressed) {
480                         if (mult) { m->mothurRemove(outAllFileName);  }
481                         m->mothurRemove(outputFileName); 
482                         delete input; 
483                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
484                         m->clearGroups(); 
485                         return 0;
486                 }
487
488                 //output error messages about any remaining user labels
489                 set<string>::iterator it;
490                 bool needToRun = false;
491                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
492                         m->mothurOut("Your file does not include the label " + *it); 
493                         if (processedLabels.count(lastLabel) != 1) {
494                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
495                                 needToRun = true;
496                         }else {
497                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
498                         }
499                 }
500                 
501                 //run last label if you need to
502                 if (needToRun == true)  {
503                                 for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
504                                 lookup = input->getSharedRAbundVectors(lastLabel);
505
506                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
507                                 process(lookup, outputFileName, outAllFileName);
508                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
509                 }
510                 
511                                 
512                 //reset groups parameter
513                 m->clearGroups();  
514                 
515                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
516                 delete input;  
517                 
518                 if (m->control_pressed) {
519                         m->mothurRemove(outAllFileName);  
520                         m->mothurRemove(outputFileName); 
521                         return 0;
522                 }
523                 
524                 m->mothurOutEndLine();
525                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
526                 m->mothurOut(outputFileName); m->mothurOutEndLine();    
527                 if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine();        outputTypes["summary"].push_back(outAllFileName); }
528                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    } outputTypes["summary"].push_back(outputFileName);
529                 m->mothurOutEndLine();
530
531                 return 0;
532         }
533         catch(exception& e) {
534                 m->errorOut(e, "SummarySharedCommand", "execute");
535                 exit(1);
536         }
537 }
538 /***********************************************************/
539 int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
540         try {
541                 
542                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
543                 
544                 //output num seqs
545                 out << simMatrix.size() << endl;
546                 
547                 if (output == "lt") {
548                         for (int b = 0; b < simMatrix.size(); b++)      {
549                                 out << lookup[b]->getGroup() << '\t';
550                                 for (int n = 0; n < b; n++)     {
551                     if (m->control_pressed) { return 0; }
552                                         out << simMatrix[b][n] << '\t'; 
553                                 }
554                                 out << endl;
555                         }
556                 }else{
557                         for (int b = 0; b < simMatrix.size(); m++)      {
558                                 out << lookup[b]->getGroup() << '\t';
559                                 for (int n = 0; n < simMatrix[b].size(); n++)   {
560                     if (m->control_pressed) { return 0; }
561                                         out << simMatrix[b][n] << '\t'; 
562                                 }
563                                 out << endl;
564                         }
565                 }
566         
567         return 0;
568         }
569         catch(exception& e) {
570                 m->errorOut(e, "SummarySharedCommand", "printSims");
571                 exit(1);
572         }
573 }
574 /***********************************************************/
575 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
576         try {
577         vector< vector< vector<seqDist> > > calcDistsTotals;  //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
578         vector< vector<seqDist>  > calcDists; calcDists.resize(sumCalculators.size());          
579         
580         for (int thisIter = 0; thisIter < iters+1; thisIter++) {
581             
582             vector<SharedRAbundVector*> thisItersLookup = thisLookup;
583             
584             if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
585                 SubSample sample;
586                 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
587                 
588                 //make copy of lookup so we don't get access violations
589                 vector<SharedRAbundVector*> newLookup;
590                 for (int k = 0; k < thisItersLookup.size(); k++) {
591                     SharedRAbundVector* temp = new SharedRAbundVector();
592                     temp->setLabel(thisItersLookup[k]->getLabel());
593                     temp->setGroup(thisItersLookup[k]->getGroup());
594                     newLookup.push_back(temp);
595                 }
596                 
597                 //for each bin
598                 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
599                     if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
600                     for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
601                 }
602                 
603                 tempLabels = sample.getSample(newLookup, subsampleSize);
604                 thisItersLookup = newLookup;
605             }
606         
607             
608             if(processors == 1){
609                 driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
610                 m->appendFiles((sumFileName + ".temp"), sumFileName);
611                 m->mothurRemove((sumFileName + ".temp"));
612                 if (mult) {
613                     m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
614                     m->mothurRemove((sumAllFileName + ".temp"));
615                 }
616             }else{
617                 
618                 int process = 1;
619                 vector<int> processIDS;
620                 
621 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
622                 //loop through and create all the processes you want
623                 while (process != processors) {
624                     int pid = fork();
625                     
626                     if (pid > 0) {
627                         processIDS.push_back(pid); 
628                         process++;
629                     }else if (pid == 0){
630                         driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
631                         
632                         //only do this if you want a distance file
633                         if (createPhylip) {
634                             string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
635                             ofstream outtemp;
636                             m->openOutputFile(tempdistFileName, outtemp);
637                             
638                             for (int i = 0; i < calcDists.size(); i++) {
639                                 outtemp << calcDists[i].size() << endl;
640                                 
641                                 for (int j = 0; j < calcDists[i].size(); j++) {
642                                     outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
643                                 }
644                             }
645                             outtemp.close();
646                         }
647                         
648                         exit(0);
649                     }else { 
650                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
651                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
652                         exit(0);
653                     }
654                 }
655                 
656                 //parent do your part
657                 driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
658                 m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
659                 m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
660                 if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
661                 
662                 //force parent to wait until all the processes are done
663                 for (int i = 0; i < processIDS.size(); i++) {
664                     int temp = processIDS[i];
665                     wait(&temp);
666                 }
667                 
668                 for (int i = 0; i < processIDS.size(); i++) {
669                     m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
670                     m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
671                     if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp"));  }
672                     
673                     if (createPhylip) {
674                         string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) +  ".dist";
675                         ifstream intemp;
676                         m->openInputFile(tempdistFileName, intemp);
677                         
678                         for (int k = 0; k < calcDists.size(); k++) {
679                             int size = 0;
680                             intemp >> size; m->gobble(intemp);
681                             
682                             for (int j = 0; j < size; j++) {
683                                 int seq1 = 0;
684                                 int seq2 = 0;
685                                 float dist = 1.0;
686                                 
687                                 intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
688                                 
689                                 seqDist tempDist(seq1, seq2, dist);
690                                 calcDists[k].push_back(tempDist);
691                             }
692                         }
693                         intemp.close();
694                         m->mothurRemove(tempdistFileName);
695                     }
696                 }
697 #else
698                 //////////////////////////////////////////////////////////////////////////////////////////////////////
699                 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
700                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
701                 //Taking advantage of shared memory to pass results vectors.
702                 //////////////////////////////////////////////////////////////////////////////////////////////////////
703                 
704                 vector<summarySharedData*> pDataArray; 
705                 DWORD   dwThreadIdArray[processors-1];
706                 HANDLE  hThreadArray[processors-1]; 
707                 
708                 //Create processor worker threads.
709                 for( int i=1; i<processors; i++ ){
710                     
711                     //make copy of lookup so we don't get access violations
712                     vector<SharedRAbundVector*> newLookup;
713                     for (int k = 0; k < thisLookup.size(); k++) {
714                         SharedRAbundVector* temp = new SharedRAbundVector();
715                         temp->setLabel(thisLookup[k]->getLabel());
716                         temp->setGroup(thisLookup[k]->getGroup());
717                         newLookup.push_back(temp);
718                     }
719                     
720                     //for each bin
721                     for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
722                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
723                         for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
724                     }
725                     
726                     // Allocate memory for thread data.
727                     summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
728                     pDataArray.push_back(tempSum);
729                     processIDS.push_back(i);
730                     
731                     hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
732                 }
733                 
734                 //parent do your part
735                 driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);   
736                 m->appendFiles((sumFileName + "0.temp"), sumFileName);
737                 m->mothurRemove((sumFileName + "0.temp"));
738                 if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
739                 
740                 //Wait until all threads have terminated.
741                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
742                 
743                 //Close all thread handles and free memory allocations.
744                 for(int i=0; i < pDataArray.size(); i++){
745                     if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
746                         m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
747                     }
748                     m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
749                     m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
750                     
751                     for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
752                     
753                     if (createPhylip) {
754                         for (int k = 0; k < calcDists.size(); k++) {
755                             int size = pDataArray[i]->calcDists[k].size();
756                             for (int j = 0; j < size; j++) {    calcDists[k].push_back(pDataArray[i]->calcDists[k][j]);    }
757                         }
758                     }
759                     
760                     CloseHandle(hThreadArray[i]);
761                     delete pDataArray[i];
762                 }
763                 
764 #endif
765             }
766             
767             if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
768                 
769                 calcDistsTotals.push_back(calcDists); 
770                 //clean up memory
771                 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
772                 thisItersLookup.clear();
773             }else {
774                 if (createPhylip) {
775                     for (int i = 0; i < calcDists.size(); i++) {
776                         if (m->control_pressed) { break; }
777                         
778                         //initialize matrix
779                         vector< vector<double> > matrix; //square matrix to represent the distance
780                         matrix.resize(thisLookup.size());
781                         for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
782                         
783                         for (int j = 0; j < calcDists[i].size(); j++) {
784                             int row = calcDists[i][j].seq1;
785                             int column = calcDists[i][j].seq2;
786                             double dist = calcDists[i][j].dist;
787                             
788                             matrix[row][column] = dist;
789                             matrix[column][row] = dist;
790                         }
791                         
792                         map<string, string> variables; 
793                         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
794                         variables["[calc]"] = sumCalculators[i]->getName();
795                         variables["[distance]"] = thisLookup[0]->getLabel();
796                         variables["[outputtag]"] = output;
797                         variables["[tag2]"] = "";
798                         string distFileName = getOutputFileName("phylip",variables);
799                         outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
800                         ofstream outDist;
801                         m->openOutputFile(distFileName, outDist);
802                         outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
803                         
804                         printSims(outDist, matrix);
805                         
806                         outDist.close();
807                     }
808                 }
809             }
810             for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
811                 }
812
813         if (iters != 0) {
814             //we need to find the average distance and standard deviation for each groups distance
815             vector< vector<seqDist>  > calcAverages = m->getAverages(calcDistsTotals);
816             
817             //find standard deviation
818             vector< vector<seqDist>  > stdDev = m->getStandardDeviation(calcDistsTotals, calcAverages); 
819             
820             //print results
821             for (int i = 0; i < calcDists.size(); i++) {
822                 vector< vector<double> > matrix; //square matrix to represent the distance
823                 matrix.resize(thisLookup.size());
824                 for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
825                 
826                 vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
827                 stdmatrix.resize(thisLookup.size());
828                 for (int k = 0; k < thisLookup.size(); k++) {  stdmatrix[k].resize(thisLookup.size(), 0.0); }
829                 
830                 
831                 for (int j = 0; j < calcAverages[i].size(); j++) {
832                     int row = calcAverages[i][j].seq1;
833                     int column = calcAverages[i][j].seq2;
834                     float dist = calcAverages[i][j].dist;
835                     float stdDist = stdDev[i][j].dist;
836                     
837                     matrix[row][column] = dist;
838                     matrix[column][row] = dist;
839                     stdmatrix[row][column] = stdDist;
840                     stdmatrix[column][row] = stdDist;
841                 }
842                 
843                 map<string, string> variables; 
844                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
845                 variables["[calc]"] = sumCalculators[i]->getName();
846                 variables["[distance]"] = thisLookup[0]->getLabel();
847                 variables["[outputtag]"] = output;
848                 variables["[tag2]"] = "ave";
849                 string distFileName = getOutputFileName("phylip",variables);
850                 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
851                 ofstream outAve;
852                 m->openOutputFile(distFileName, outAve);
853                 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
854                 
855                 printSims(outAve, matrix);
856                 
857                 outAve.close();
858                 
859                 variables["[tag2]"] = "std";
860                 distFileName = getOutputFileName("phylip",variables);
861                 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
862                 ofstream outSTD;
863                 m->openOutputFile(distFileName, outSTD);
864                 outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
865                 
866                 printSims(outSTD, stdmatrix);
867                 
868                 outSTD.close();
869                 
870             }
871         }
872         
873         return 0;
874         }
875         catch(exception& e) {
876                 m->errorOut(e, "SummarySharedCommand", "process");
877                 exit(1);
878         }
879 }
880 /**************************************************************************************************/
881 int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) { 
882         try {
883                 
884                 //loop through calculators and add to file all for all calcs that can do mutiple groups
885                 if (mult == true) {
886                         ofstream outAll;
887                         m->openOutputFile(sumAllFile, outAll);
888                         
889                         //output label
890                         outAll << thisLookup[0]->getLabel() << '\t';
891                         
892                         //output groups names
893                         string outNames = "";
894                         for (int j = 0; j < thisLookup.size(); j++) {
895                                 outNames += thisLookup[j]->getGroup() +  "-";
896                         }
897                         outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
898                         outAll << outNames << '\t';
899                         
900                         for(int i=0;i<sumCalculators.size();i++){
901                                 if (sumCalculators[i]->getMultiple() == true) { 
902                                         sumCalculators[i]->getValues(thisLookup);
903                                         
904                                         if (m->control_pressed) { outAll.close(); return 1; }
905                                         
906                                         outAll << '\t';
907                                         sumCalculators[i]->print(outAll);
908                                 }
909                         }
910                         outAll << endl;
911                         outAll.close();
912                 }
913                 
914                 ofstream outputFileHandle;
915                 m->openOutputFile(sumFile, outputFileHandle);
916                 
917                 vector<SharedRAbundVector*> subset;
918                 for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
919
920                         for (int l = 0; l < k; l++) {
921                                 
922                                 outputFileHandle << thisLookup[0]->getLabel() << '\t';
923                                 
924                                 subset.clear(); //clear out old pair of sharedrabunds
925                                 //add new pair of sharedrabunds
926                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
927                                 
928                                 //sort groups to be alphanumeric
929                                 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
930                                         outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
931                                 }else{
932                                         outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
933                                 }
934                                 
935                                 for(int i=0;i<sumCalculators.size();i++) {
936                                         
937                                         //if this calc needs all groups to calculate the pair load all groups
938                                         if (sumCalculators[i]->getNeedsAll()) { 
939                                                 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
940                                                 for (int w = 0; w < thisLookup.size(); w++) {
941                                                         if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
942                                                 }
943                                         }
944                                         
945                                         vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
946                                         
947                                         if (m->control_pressed) { outputFileHandle.close(); return 1; }
948                                         
949                                         outputFileHandle << '\t';
950                                         sumCalculators[i]->print(outputFileHandle);
951                                         
952                                         seqDist temp(l, k, tempdata[0]);
953                                         calcDists[i].push_back(temp);
954                                 }
955                                 outputFileHandle << endl;
956                         }
957                 }
958                 
959                 outputFileHandle.close();
960                 
961                 return 0;
962         }
963         catch(exception& e) {
964                 m->errorOut(e, "SummarySharedCommand", "driver");
965                 exit(1);
966         }
967 }
968 /**************************************************************************************************/
969
970