]> git.donarmstrong.com Git - mothur.git/blob - summarysharedcommand.cpp
fixed bug in phylo.diversity rooting. added filename patterns and create filename...
[mothur.git] / summarysharedcommand.cpp
1 /*
2  *  summarysharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarysharedcommand.h"
11 #include "subsample.h"
12
13 //**********************************************************************************************************************
14 vector<string> SummarySharedCommand::setParameters(){   
15         try {
16                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
17                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
18         CommandParameter psubsample("subsample", "String", "", "", "", "", "","phylip",false,false); parameters.push_back(psubsample);
19                 CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "","phylip",false,false); parameters.push_back(pdistance);
20                 CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
21         CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput);
22                 CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
23         CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "SummarySharedCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string SummarySharedCommand::getHelpString(){   
40         try {
41                 string helpString = "";
42                 ValidCalculators validCalculator;
43                 helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all.  shared is required if there is no current sharedfile.\n";
44                 helpString += "The summary.shared command should be in the following format: \n";
45                 helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
46                 helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
47                 helpString +=  validCalculator.printCalc("sharedsummary");
48         helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
49         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
50         helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
51                 helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
52                 helpString += "The default value for groups is all the groups in your groupfile.\n";
53                 helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
54                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
55                 helpString += "The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
56                 helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
57                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
58                 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
59                 return helpString;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "SummarySharedCommand", "getHelpString");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 string SummarySharedCommand::getOutputPattern(string type) {
68     try {
69         string pattern = "";
70         
71         if (type == "summary") {  pattern = "[filename],summary-[filename],[tag],summary"; } 
72         else if (type == "phylip") {  pattern = "[filename],[calc],[distance],[outputtag],[tag2],dist"; } 
73         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
74         
75         return pattern;
76     }
77     catch(exception& e) {
78         m->errorOut(e, "SummarySharedCommand", "getOutputPattern");
79         exit(1);
80     }
81 }
82 //**********************************************************************************************************************
83 SummarySharedCommand::SummarySharedCommand(){   
84         try {
85                 abort = true; calledHelp = true; 
86                 setParameters();
87                 vector<string> tempOutNames;
88                 outputTypes["summary"] = tempOutNames;
89         outputTypes["phylip"] = tempOutNames;
90         }
91         catch(exception& e) {
92                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
93                 exit(1);
94         }
95 }
96 //**********************************************************************************************************************
97
98 SummarySharedCommand::SummarySharedCommand(string option)  {
99         try {
100                 abort = false; calledHelp = false;   
101                 allLines = 1;
102                                 
103                 //allow user to run help
104                 if(option == "help") {  help(); abort = true; calledHelp = true; }
105                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
106                 
107                 else {
108                         vector<string> myArray = setParameters();
109                         
110                         OptionParser parser(option);
111                         map<string, string> parameters = parser.getParameters();
112                         map<string, string>::iterator it;
113                         
114                         ValidParameters validParameter;
115                 
116                         //check to make sure all parameters are valid for command
117                         for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
118                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
119                         }
120                         
121                         //initialize outputTypes
122                         vector<string> tempOutNames;
123                         outputTypes["summary"] = tempOutNames;
124             outputTypes["phylip"] = tempOutNames;
125                         
126                         //if the user changes the input directory command factory will send this info to us in the output parameter 
127                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
128                         if (inputDir == "not found"){   inputDir = "";          }
129                         else {
130                                 string path;
131                                 it = parameters.find("shared");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
137                                 }
138                         }
139                         
140                         //get shared file
141                         sharedfile = validParameter.validFile(parameters, "shared", true);
142                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
143                         else if (sharedfile == "not found") { 
144                                 //if there is a current shared file, use it
145                                 sharedfile = m->getSharedFile(); 
146                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
147                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
148                         }else { m->setSharedFile(sharedfile); }
149                         
150                         
151                         //if the user changes the output directory command factory will send this info to us in the output parameter 
152                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
153                         
154
155                         //check for optional parameter and set defaults
156                         // ...at some point should added some additional type checking...
157                         label = validParameter.validFile(parameters, "label", false);                   
158                         if (label == "not found") { label = ""; }
159                         else { 
160                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
161                                 else { allLines = 1;  }
162                         }
163                         
164                                         
165                         calc = validParameter.validFile(parameters, "calc", false);                     
166                         if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
167                         else { 
168                                  if (calc == "default")  {  calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
169                         }
170                         m->splitAtDash(calc, Estimators);
171                         if (m->inUsersGroups("citation", Estimators)) { 
172                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
173                                 //remove citation from list of calcs
174                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
175                         }
176                         
177                         groups = validParameter.validFile(parameters, "groups", false);                 
178                         if (groups == "not found") { groups = ""; }
179                         else { 
180                                 m->splitAtDash(groups, Groups);
181                                 m->setGroups(Groups);
182                         }
183                         
184                         string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
185                         all = m->isTrue(temp);
186                         
187                         temp = validParameter.validFile(parameters, "distance", false);                                 if (temp == "not found") { temp = "false"; }
188                         createPhylip = m->isTrue(temp);
189                         
190             temp = validParameter.validFile(parameters, "iters", false);                        if (temp == "not found") { temp = "1000"; }
191                         m->mothurConvert(temp, iters); 
192             
193             output = validParameter.validFile(parameters, "output", false);             if(output == "not found"){      output = "lt"; }
194                         if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
195             
196             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
197                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
198             else {  
199                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
200                 else { subsample = false; }
201             }
202             
203             if (subsample == false) { iters = 0; }
204             
205                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
206                         m->setProcessors(temp);
207                         m->mothurConvert(temp, processors); 
208                         
209                         if (abort == false) {
210                         
211                                 ValidCalculators validCalculator;
212                                 int i;
213                                 
214                                 for (i=0; i<Estimators.size(); i++) {
215                                         if (validCalculator.isValidCalculator("sharedsummary", Estimators[i]) == true) { 
216                                                 if (Estimators[i] == "sharedsobs") { 
217                                                         sumCalculators.push_back(new SharedSobsCS());
218                                                 }else if (Estimators[i] == "sharedchao") { 
219                                                         sumCalculators.push_back(new SharedChao1());
220                                                 }else if (Estimators[i] == "sharedace") { 
221                                                         sumCalculators.push_back(new SharedAce());
222                                                 }else if (Estimators[i] == "jabund") {  
223                                                         sumCalculators.push_back(new JAbund());
224                                                 }else if (Estimators[i] == "sorabund") { 
225                                                         sumCalculators.push_back(new SorAbund());
226                                                 }else if (Estimators[i] == "jclass") { 
227                                                         sumCalculators.push_back(new Jclass());
228                                                 }else if (Estimators[i] == "sorclass") { 
229                                                         sumCalculators.push_back(new SorClass());
230                                                 }else if (Estimators[i] == "jest") { 
231                                                         sumCalculators.push_back(new Jest());
232                                                 }else if (Estimators[i] == "sorest") { 
233                                                         sumCalculators.push_back(new SorEst());
234                                                 }else if (Estimators[i] == "thetayc") { 
235                                                         sumCalculators.push_back(new ThetaYC());
236                                                 }else if (Estimators[i] == "thetan") { 
237                                                         sumCalculators.push_back(new ThetaN());
238                                                 }else if (Estimators[i] == "kstest") { 
239                                                         sumCalculators.push_back(new KSTest());
240                                                 }else if (Estimators[i] == "sharednseqs") { 
241                                                         sumCalculators.push_back(new SharedNSeqs());
242                                                 }else if (Estimators[i] == "ochiai") { 
243                                                         sumCalculators.push_back(new Ochiai());
244                                                 }else if (Estimators[i] == "anderberg") { 
245                                                         sumCalculators.push_back(new Anderberg());
246                                                 }else if (Estimators[i] == "kulczynski") { 
247                                                         sumCalculators.push_back(new Kulczynski());
248                                                 }else if (Estimators[i] == "kulczynskicody") { 
249                                                         sumCalculators.push_back(new KulczynskiCody());
250                                                 }else if (Estimators[i] == "lennon") { 
251                                                         sumCalculators.push_back(new Lennon());
252                                                 }else if (Estimators[i] == "morisitahorn") { 
253                                                         sumCalculators.push_back(new MorHorn());
254                                                 }else if (Estimators[i] == "braycurtis") { 
255                                                         sumCalculators.push_back(new BrayCurtis());
256                                                 }else if (Estimators[i] == "whittaker") { 
257                                                         sumCalculators.push_back(new Whittaker());
258                                                 }else if (Estimators[i] == "odum") { 
259                                                         sumCalculators.push_back(new Odum());
260                                                 }else if (Estimators[i] == "canberra") { 
261                                                         sumCalculators.push_back(new Canberra());
262                                                 }else if (Estimators[i] == "structeuclidean") { 
263                                                         sumCalculators.push_back(new StructEuclidean());
264                                                 }else if (Estimators[i] == "structchord") { 
265                                                         sumCalculators.push_back(new StructChord());
266                                                 }else if (Estimators[i] == "hellinger") { 
267                                                         sumCalculators.push_back(new Hellinger());
268                                                 }else if (Estimators[i] == "manhattan") { 
269                                                         sumCalculators.push_back(new Manhattan());
270                                                 }else if (Estimators[i] == "structpearson") { 
271                                                         sumCalculators.push_back(new StructPearson());
272                                                 }else if (Estimators[i] == "soergel") { 
273                                                         sumCalculators.push_back(new Soergel());
274                                                 }else if (Estimators[i] == "spearman") { 
275                                                         sumCalculators.push_back(new Spearman());
276                                                 }else if (Estimators[i] == "structkulczynski") { 
277                                                         sumCalculators.push_back(new StructKulczynski());
278                                                 }else if (Estimators[i] == "speciesprofile") { 
279                                                         sumCalculators.push_back(new SpeciesProfile());
280                                                 }else if (Estimators[i] == "hamming") { 
281                                                         sumCalculators.push_back(new Hamming());
282                                                 }else if (Estimators[i] == "structchi2") { 
283                                                         sumCalculators.push_back(new StructChi2());
284                                                 }else if (Estimators[i] == "gower") { 
285                                                         sumCalculators.push_back(new Gower());
286                                                 }else if (Estimators[i] == "memchi2") { 
287                                                         sumCalculators.push_back(new MemChi2());
288                                                 }else if (Estimators[i] == "memchord") { 
289                                                         sumCalculators.push_back(new MemChord());
290                                                 }else if (Estimators[i] == "memeuclidean") { 
291                                                         sumCalculators.push_back(new MemEuclidean());
292                                                 }else if (Estimators[i] == "mempearson") { 
293                                                         sumCalculators.push_back(new MemPearson());
294                                                 }
295                                         }
296                                 }
297                                 
298                                 mult = false;
299                         }
300                 }
301         }
302         catch(exception& e) {
303                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
304                 exit(1);
305         }
306 }
307 //**********************************************************************************************************************
308
309 int SummarySharedCommand::execute(){
310         try {
311         
312                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
313                 
314                 ofstream outputFileHandle, outAll;
315         map<string, string> variables; 
316                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
317                 string outputFileName = getOutputFileName("summary",variables);
318                 
319                 //if the users entered no valid calculators don't execute command
320                 if (sumCalculators.size() == 0) { return 0; }
321                 //check if any calcs can do multiples
322                 else{
323                         if (all){ 
324                                 for (int i = 0; i < sumCalculators.size(); i++) {
325                                         if (sumCalculators[i]->getMultiple() == true) { mult = true; }
326                                 }
327                         }
328                 }
329                         
330                 input = new InputData(sharedfile, "sharedfile");
331                 lookup = input->getSharedRAbundVectors();
332                 string lastLabel = lookup[0]->getLabel();
333         
334                 /******************************************************/
335                 //output headings for files
336                 /******************************************************/
337                 //output estimator names as column headers
338                 m->openOutputFile(outputFileName, outputFileHandle);
339                 outputFileHandle << "label" <<'\t' << "comparison" << '\t'; 
340                 for(int i=0;i<sumCalculators.size();i++){
341                         outputFileHandle << '\t' << sumCalculators[i]->getName();
342                         if (sumCalculators[i]->getCols() == 3) {   outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";  }
343                 }
344                 outputFileHandle << endl;
345                 outputFileHandle.close();
346                 
347                 //create file and put column headers for multiple groups file
348         variables["[tag]"]= "multiple";
349                 string outAllFileName = getOutputFileName("summary",variables);
350                 if (mult == true) {
351                         m->openOutputFile(outAllFileName, outAll);
352                         outputNames.push_back(outAllFileName);
353                         
354                         outAll << "label" <<'\t' << "comparison" << '\t'; 
355                         for(int i=0;i<sumCalculators.size();i++){
356                                 if (sumCalculators[i]->getMultiple() == true) { 
357                                         outAll << '\t' << sumCalculators[i]->getName();
358                                 }
359                         }
360                         outAll << endl;
361                         outAll.close();
362                 }
363                 
364                 if (lookup.size() < 2) { 
365                         m->mothurOut("I cannot run the command without at least 2 valid groups."); 
366                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
367                         
368                         //close files and clean up
369                         m->mothurRemove(outputFileName);
370                         if (mult == true) { m->mothurRemove(outAllFileName);  }
371                         return 0;
372                 //if you only have 2 groups you don't need a .sharedmultiple file
373                 }else if ((lookup.size() == 2) && (mult == true)) { 
374                         mult = false;
375                         m->mothurRemove(outAllFileName);
376                         outputNames.pop_back();
377                 }
378                 
379                 if (m->control_pressed) {
380                         if (mult) {  m->mothurRemove(outAllFileName);  }
381                         m->mothurRemove(outputFileName); 
382                         delete input;
383                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
384                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
385                         m->clearGroups(); 
386                         return 0;
387                 }
388                 /******************************************************/
389         if (subsample) { 
390             if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
391                 subsampleSize = lookup[0]->getNumSeqs();
392                 for (int i = 1; i < lookup.size(); i++) {
393                     int thisSize = lookup[i]->getNumSeqs();
394                     
395                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
396                 }
397             }else {
398                 m->clearGroups();
399                 Groups.clear();
400                 vector<SharedRAbundVector*> temp;
401                 for (int i = 0; i < lookup.size(); i++) {
402                     if (lookup[i]->getNumSeqs() < subsampleSize) { 
403                         m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
404                         delete lookup[i];
405                     }else { 
406                         Groups.push_back(lookup[i]->getGroup()); 
407                         temp.push_back(lookup[i]);
408                     }
409                 } 
410                 lookup = temp;
411                 m->setGroups(Groups);
412             }
413             
414             if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; delete input; return 0; }
415         }
416
417                 
418                 /******************************************************/
419                 //comparison breakup to be used by different processes later
420                 numGroups = lookup.size();
421                 lines.resize(processors);
422                 for (int i = 0; i < processors; i++) {
423                         lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
424                         lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
425                 }               
426                 /******************************************************/
427                 
428                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
429                 set<string> processedLabels;
430                 set<string> userLabels = labels;
431                         
432                 //as long as you are not at the end of the file or done wih the lines you want
433                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
434                         if (m->control_pressed) {
435                                 if (mult) {  m->mothurRemove(outAllFileName);  }
436                                 m->mothurRemove(outputFileName); 
437                                 delete input; 
438                                 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
439                                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
440                                 m->clearGroups(); 
441                                 return 0;
442                         }
443
444                 
445                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
446                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
447                                 process(lookup, outputFileName, outAllFileName);
448                                 
449                                 processedLabels.insert(lookup[0]->getLabel());
450                                 userLabels.erase(lookup[0]->getLabel());
451                         }
452                         
453                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
454                                         string saveLabel = lookup[0]->getLabel();
455                                         
456                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
457                                         lookup = input->getSharedRAbundVectors(lastLabel);
458
459                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
460                                         process(lookup, outputFileName, outAllFileName);
461                                         
462                                         processedLabels.insert(lookup[0]->getLabel());
463                                         userLabels.erase(lookup[0]->getLabel());
464                                         
465                                         //restore real lastlabel to save below
466                                         lookup[0]->setLabel(saveLabel);
467                         }
468                         
469                         lastLabel = lookup[0]->getLabel();                      
470                                 
471                         //get next line to process
472                         //prevent memory leak
473                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
474                         lookup = input->getSharedRAbundVectors();
475                 }
476                 
477                 if (m->control_pressed) {
478                         if (mult) { m->mothurRemove(outAllFileName);  }
479                         m->mothurRemove(outputFileName); 
480                         delete input; 
481                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
482                         m->clearGroups(); 
483                         return 0;
484                 }
485
486                 //output error messages about any remaining user labels
487                 set<string>::iterator it;
488                 bool needToRun = false;
489                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
490                         m->mothurOut("Your file does not include the label " + *it); 
491                         if (processedLabels.count(lastLabel) != 1) {
492                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
493                                 needToRun = true;
494                         }else {
495                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
496                         }
497                 }
498                 
499                 //run last label if you need to
500                 if (needToRun == true)  {
501                                 for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
502                                 lookup = input->getSharedRAbundVectors(lastLabel);
503
504                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
505                                 process(lookup, outputFileName, outAllFileName);
506                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
507                 }
508                 
509                                 
510                 //reset groups parameter
511                 m->clearGroups();  
512                 
513                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
514                 delete input;  
515                 
516                 if (m->control_pressed) {
517                         m->mothurRemove(outAllFileName);  
518                         m->mothurRemove(outputFileName); 
519                         return 0;
520                 }
521                 
522                 m->mothurOutEndLine();
523                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
524                 m->mothurOut(outputFileName); m->mothurOutEndLine();    
525                 if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine();        outputTypes["summary"].push_back(outAllFileName); }
526                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    } outputTypes["summary"].push_back(outputFileName);
527                 m->mothurOutEndLine();
528
529                 return 0;
530         }
531         catch(exception& e) {
532                 m->errorOut(e, "SummarySharedCommand", "execute");
533                 exit(1);
534         }
535 }
536 /***********************************************************/
537 int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
538         try {
539                 
540                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
541                 
542                 //output num seqs
543                 out << simMatrix.size() << endl;
544                 
545                 if (output == "lt") {
546                         for (int b = 0; b < simMatrix.size(); b++)      {
547                                 out << lookup[b]->getGroup() << '\t';
548                                 for (int n = 0; n < b; n++)     {
549                     if (m->control_pressed) { return 0; }
550                                         out << simMatrix[b][n] << '\t'; 
551                                 }
552                                 out << endl;
553                         }
554                 }else{
555                         for (int b = 0; b < simMatrix.size(); m++)      {
556                                 out << lookup[b]->getGroup() << '\t';
557                                 for (int n = 0; n < simMatrix[b].size(); n++)   {
558                     if (m->control_pressed) { return 0; }
559                                         out << simMatrix[b][n] << '\t'; 
560                                 }
561                                 out << endl;
562                         }
563                 }
564         
565         return 0;
566         }
567         catch(exception& e) {
568                 m->errorOut(e, "SummarySharedCommand", "printSims");
569                 exit(1);
570         }
571 }
572 /***********************************************************/
573 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
574         try {
575         vector< vector< vector<seqDist> > > calcDistsTotals;  //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
576         vector< vector<seqDist>  > calcDists; calcDists.resize(sumCalculators.size());          
577         
578         for (int thisIter = 0; thisIter < iters+1; thisIter++) {
579             
580             vector<SharedRAbundVector*> thisItersLookup = thisLookup;
581             
582             if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
583                 SubSample sample;
584                 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
585                 
586                 //make copy of lookup so we don't get access violations
587                 vector<SharedRAbundVector*> newLookup;
588                 for (int k = 0; k < thisItersLookup.size(); k++) {
589                     SharedRAbundVector* temp = new SharedRAbundVector();
590                     temp->setLabel(thisItersLookup[k]->getLabel());
591                     temp->setGroup(thisItersLookup[k]->getGroup());
592                     newLookup.push_back(temp);
593                 }
594                 
595                 //for each bin
596                 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
597                     if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
598                     for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
599                 }
600                 
601                 tempLabels = sample.getSample(newLookup, subsampleSize);
602                 thisItersLookup = newLookup;
603             }
604         
605             
606             if(processors == 1){
607                 driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
608                 m->appendFiles((sumFileName + ".temp"), sumFileName);
609                 m->mothurRemove((sumFileName + ".temp"));
610                 if (mult) {
611                     m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
612                     m->mothurRemove((sumAllFileName + ".temp"));
613                 }
614             }else{
615                 
616                 int process = 1;
617                 vector<int> processIDS;
618                 
619 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
620                 //loop through and create all the processes you want
621                 while (process != processors) {
622                     int pid = fork();
623                     
624                     if (pid > 0) {
625                         processIDS.push_back(pid); 
626                         process++;
627                     }else if (pid == 0){
628                         driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
629                         
630                         //only do this if you want a distance file
631                         if (createPhylip) {
632                             string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
633                             ofstream outtemp;
634                             m->openOutputFile(tempdistFileName, outtemp);
635                             
636                             for (int i = 0; i < calcDists.size(); i++) {
637                                 outtemp << calcDists[i].size() << endl;
638                                 
639                                 for (int j = 0; j < calcDists[i].size(); j++) {
640                                     outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
641                                 }
642                             }
643                             outtemp.close();
644                         }
645                         
646                         exit(0);
647                     }else { 
648                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
649                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
650                         exit(0);
651                     }
652                 }
653                 
654                 //parent do your part
655                 driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
656                 m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
657                 m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
658                 if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
659                 
660                 //force parent to wait until all the processes are done
661                 for (int i = 0; i < processIDS.size(); i++) {
662                     int temp = processIDS[i];
663                     wait(&temp);
664                 }
665                 
666                 for (int i = 0; i < processIDS.size(); i++) {
667                     m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
668                     m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
669                     if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp"));  }
670                     
671                     if (createPhylip) {
672                         string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) +  ".dist";
673                         ifstream intemp;
674                         m->openInputFile(tempdistFileName, intemp);
675                         
676                         for (int k = 0; k < calcDists.size(); k++) {
677                             int size = 0;
678                             intemp >> size; m->gobble(intemp);
679                             
680                             for (int j = 0; j < size; j++) {
681                                 int seq1 = 0;
682                                 int seq2 = 0;
683                                 float dist = 1.0;
684                                 
685                                 intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
686                                 
687                                 seqDist tempDist(seq1, seq2, dist);
688                                 calcDists[k].push_back(tempDist);
689                             }
690                         }
691                         intemp.close();
692                         m->mothurRemove(tempdistFileName);
693                     }
694                 }
695 #else
696                 //////////////////////////////////////////////////////////////////////////////////////////////////////
697                 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
698                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
699                 //Taking advantage of shared memory to pass results vectors.
700                 //////////////////////////////////////////////////////////////////////////////////////////////////////
701                 
702                 vector<summarySharedData*> pDataArray; 
703                 DWORD   dwThreadIdArray[processors-1];
704                 HANDLE  hThreadArray[processors-1]; 
705                 
706                 //Create processor worker threads.
707                 for( int i=1; i<processors; i++ ){
708                     
709                     //make copy of lookup so we don't get access violations
710                     vector<SharedRAbundVector*> newLookup;
711                     for (int k = 0; k < thisLookup.size(); k++) {
712                         SharedRAbundVector* temp = new SharedRAbundVector();
713                         temp->setLabel(thisLookup[k]->getLabel());
714                         temp->setGroup(thisLookup[k]->getGroup());
715                         newLookup.push_back(temp);
716                     }
717                     
718                     //for each bin
719                     for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
720                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
721                         for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
722                     }
723                     
724                     // Allocate memory for thread data.
725                     summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
726                     pDataArray.push_back(tempSum);
727                     processIDS.push_back(i);
728                     
729                     hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
730                 }
731                 
732                 //parent do your part
733                 driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);   
734                 m->appendFiles((sumFileName + "0.temp"), sumFileName);
735                 m->mothurRemove((sumFileName + "0.temp"));
736                 if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
737                 
738                 //Wait until all threads have terminated.
739                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
740                 
741                 //Close all thread handles and free memory allocations.
742                 for(int i=0; i < pDataArray.size(); i++){
743                     m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
744                     m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
745                     
746                     for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
747                     
748                     if (createPhylip) {
749                         for (int k = 0; k < calcDists.size(); k++) {
750                             int size = pDataArray[i]->calcDists[k].size();
751                             for (int j = 0; j < size; j++) {    calcDists[k].push_back(pDataArray[i]->calcDists[k][j]);    }
752                         }
753                     }
754                     
755                     CloseHandle(hThreadArray[i]);
756                     delete pDataArray[i];
757                 }
758                 
759 #endif
760             }
761             
762             if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
763                 
764                 calcDistsTotals.push_back(calcDists); 
765                 //clean up memory
766                 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
767                 thisItersLookup.clear();
768             }else {
769                 if (createPhylip) {
770                     for (int i = 0; i < calcDists.size(); i++) {
771                         if (m->control_pressed) { break; }
772                         
773                         //initialize matrix
774                         vector< vector<double> > matrix; //square matrix to represent the distance
775                         matrix.resize(thisLookup.size());
776                         for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
777                         
778                         for (int j = 0; j < calcDists[i].size(); j++) {
779                             int row = calcDists[i][j].seq1;
780                             int column = calcDists[i][j].seq2;
781                             double dist = calcDists[i][j].dist;
782                             
783                             matrix[row][column] = dist;
784                             matrix[column][row] = dist;
785                         }
786                         
787                         map<string, string> variables; 
788                         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
789                         variables["[calc]"] = sumCalculators[i]->getName();
790                         variables["[distance]"] = thisLookup[0]->getLabel();
791                         variables["[outputtag]"] = output;
792                         string distFileName = getOutputFileName("phylip",variables);
793                         outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
794                         ofstream outDist;
795                         m->openOutputFile(distFileName, outDist);
796                         outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
797                         
798                         printSims(outDist, matrix);
799                         
800                         outDist.close();
801                     }
802                 }
803             }
804             for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
805                 }
806
807         if (iters != 1) {
808             //we need to find the average distance and standard deviation for each groups distance
809             
810             vector< vector<seqDist>  > calcAverages; calcAverages.resize(sumCalculators.size()); 
811             for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
812                 calcAverages[i].resize(calcDistsTotals[0][i].size());
813                 
814                 for (int j = 0; j < calcAverages[i].size(); j++) {
815                     calcAverages[i][j].seq1 = calcDists[i][j].seq1;
816                     calcAverages[i][j].seq2 = calcDists[i][j].seq2;
817                     calcAverages[i][j].dist = 0.0;
818                 }
819             }
820             
821             for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
822                 for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
823                     for (int j = 0; j < calcAverages[i].size(); j++) {
824                         calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
825                     }
826                 }
827             }
828             
829             for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
830                 for (int j = 0; j < calcAverages[i].size(); j++) {
831                     calcAverages[i][j].dist /= (float) iters;
832                 }
833             }
834             
835             //find standard deviation
836             vector< vector<seqDist>  > stdDev; stdDev.resize(sumCalculators.size());
837             for (int i = 0; i < stdDev.size(); i++) {  //initialize sums to zero.
838                 stdDev[i].resize(calcDistsTotals[0][i].size());
839                 
840                 for (int j = 0; j < stdDev[i].size(); j++) {
841                     stdDev[i][j].seq1 = calcDists[i][j].seq1;
842                     stdDev[i][j].seq2 = calcDists[i][j].seq2;
843                     stdDev[i][j].dist = 0.0;
844                 }
845             }
846             
847             for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
848                 for (int i = 0; i < stdDev.size(); i++) {  
849                     for (int j = 0; j < stdDev[i].size(); j++) {
850                         stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
851                     }
852                 }
853             }
854             
855             for (int i = 0; i < stdDev.size(); i++) {  //finds average.
856                 for (int j = 0; j < stdDev[i].size(); j++) {
857                     stdDev[i][j].dist /= (float) iters;
858                     stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
859                 }
860             }
861             
862             //print results
863             for (int i = 0; i < calcDists.size(); i++) {
864                 vector< vector<double> > matrix; //square matrix to represent the distance
865                 matrix.resize(thisLookup.size());
866                 for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
867                 
868                 vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
869                 stdmatrix.resize(thisLookup.size());
870                 for (int k = 0; k < thisLookup.size(); k++) {  stdmatrix[k].resize(thisLookup.size(), 0.0); }
871                 
872                 
873                 for (int j = 0; j < calcAverages[i].size(); j++) {
874                     int row = calcAverages[i][j].seq1;
875                     int column = calcAverages[i][j].seq2;
876                     float dist = calcAverages[i][j].dist;
877                     float stdDist = stdDev[i][j].dist;
878                     
879                     matrix[row][column] = dist;
880                     matrix[column][row] = dist;
881                     stdmatrix[row][column] = stdDist;
882                     stdmatrix[column][row] = stdDist;
883                 }
884                 
885                 map<string, string> variables; 
886                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
887                 variables["[calc]"] = sumCalculators[i]->getName();
888                 variables["[distance]"] = thisLookup[0]->getLabel();
889                 variables["[outputtag]"] = output;
890                 variables["[tag2]"] = "ave";
891                 string distFileName = getOutputFileName("phylip",variables);
892                 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
893                 ofstream outAve;
894                 m->openOutputFile(distFileName, outAve);
895                 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
896                 
897                 printSims(outAve, matrix);
898                 
899                 outAve.close();
900                 
901                 variables["[tag2]"] = "std";
902                 distFileName = getOutputFileName("phylip",variables);
903                 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
904                 ofstream outSTD;
905                 m->openOutputFile(distFileName, outSTD);
906                 outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
907                 
908                 printSims(outSTD, stdmatrix);
909                 
910                 outSTD.close();
911                 
912             }
913         }
914         
915         return 0;
916         }
917         catch(exception& e) {
918                 m->errorOut(e, "SummarySharedCommand", "process");
919                 exit(1);
920         }
921 }
922 /**************************************************************************************************/
923 int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) { 
924         try {
925                 
926                 //loop through calculators and add to file all for all calcs that can do mutiple groups
927                 if (mult == true) {
928                         ofstream outAll;
929                         m->openOutputFile(sumAllFile, outAll);
930                         
931                         //output label
932                         outAll << thisLookup[0]->getLabel() << '\t';
933                         
934                         //output groups names
935                         string outNames = "";
936                         for (int j = 0; j < thisLookup.size(); j++) {
937                                 outNames += thisLookup[j]->getGroup() +  "-";
938                         }
939                         outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
940                         outAll << outNames << '\t';
941                         
942                         for(int i=0;i<sumCalculators.size();i++){
943                                 if (sumCalculators[i]->getMultiple() == true) { 
944                                         sumCalculators[i]->getValues(thisLookup);
945                                         
946                                         if (m->control_pressed) { outAll.close(); return 1; }
947                                         
948                                         outAll << '\t';
949                                         sumCalculators[i]->print(outAll);
950                                 }
951                         }
952                         outAll << endl;
953                         outAll.close();
954                 }
955                 
956                 ofstream outputFileHandle;
957                 m->openOutputFile(sumFile, outputFileHandle);
958                 
959                 vector<SharedRAbundVector*> subset;
960                 for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
961
962                         for (int l = 0; l < k; l++) {
963                                 
964                                 outputFileHandle << thisLookup[0]->getLabel() << '\t';
965                                 
966                                 subset.clear(); //clear out old pair of sharedrabunds
967                                 //add new pair of sharedrabunds
968                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
969                                 
970                                 //sort groups to be alphanumeric
971                                 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
972                                         outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
973                                 }else{
974                                         outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
975                                 }
976                                 
977                                 for(int i=0;i<sumCalculators.size();i++) {
978                                         
979                                         //if this calc needs all groups to calculate the pair load all groups
980                                         if (sumCalculators[i]->getNeedsAll()) { 
981                                                 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
982                                                 for (int w = 0; w < thisLookup.size(); w++) {
983                                                         if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
984                                                 }
985                                         }
986                                         
987                                         vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
988                                         
989                                         if (m->control_pressed) { outputFileHandle.close(); return 1; }
990                                         
991                                         outputFileHandle << '\t';
992                                         sumCalculators[i]->print(outputFileHandle);
993                                         
994                                         seqDist temp(l, k, tempdata[0]);
995                                         calcDists[i].push_back(temp);
996                                 }
997                                 outputFileHandle << endl;
998                         }
999                 }
1000                 
1001                 outputFileHandle.close();
1002                 
1003                 return 0;
1004         }
1005         catch(exception& e) {
1006                 m->errorOut(e, "SummarySharedCommand", "driver");
1007                 exit(1);
1008         }
1009 }
1010 /**************************************************************************************************/
1011
1012