]> git.donarmstrong.com Git - mothur.git/blob - summarysharedcommand.cpp
added modify names parameter to set.dir
[mothur.git] / summarysharedcommand.cpp
1 /*
2  *  summarysharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/2/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "summarysharedcommand.h"
11 #include "subsample.h"
12
13 //**********************************************************************************************************************
14 vector<string> SummarySharedCommand::setParameters(){   
15         try {
16                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
17                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
18         CommandParameter psubsample("subsample", "String", "", "", "", "", "","phylip",false,false); parameters.push_back(psubsample);
19                 CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "","phylip",false,false); parameters.push_back(pdistance);
20                 CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
21         CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput);
22                 CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
23         CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "SummarySharedCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string SummarySharedCommand::getHelpString(){   
40         try {
41                 string helpString = "";
42                 ValidCalculators validCalculator;
43                 helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all.  shared is required if there is no current sharedfile.\n";
44                 helpString += "The summary.shared command should be in the following format: \n";
45                 helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
46                 helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
47                 helpString +=  validCalculator.printCalc("sharedsummary");
48         helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
49         helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
50         helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
51                 helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
52                 helpString += "The default value for groups is all the groups in your groupfile.\n";
53                 helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
54                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
55                 helpString += "The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
56                 helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
57                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
58                 helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
59                 return helpString;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "SummarySharedCommand", "getHelpString");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 string SummarySharedCommand::getOutputPattern(string type) {
68     try {
69         string pattern = "";
70         
71         if (type == "summary") {  pattern = "[filename],summary-[filename],[tag],summary"; } 
72         else if (type == "phylip") {  pattern = "[filename],[calc],[distance],[outputtag],[tag2],dist"; } 
73         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
74         
75         return pattern;
76     }
77     catch(exception& e) {
78         m->errorOut(e, "SummarySharedCommand", "getOutputPattern");
79         exit(1);
80     }
81 }
82 //**********************************************************************************************************************
83 SummarySharedCommand::SummarySharedCommand(){   
84         try {
85                 abort = true; calledHelp = true; 
86                 setParameters();
87                 vector<string> tempOutNames;
88                 outputTypes["summary"] = tempOutNames;
89         outputTypes["phylip"] = tempOutNames;
90         }
91         catch(exception& e) {
92                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
93                 exit(1);
94         }
95 }
96 //**********************************************************************************************************************
97
98 SummarySharedCommand::SummarySharedCommand(string option)  {
99         try {
100                 abort = false; calledHelp = false;   
101                 allLines = 1;
102                                 
103                 //allow user to run help
104                 if(option == "help") {  help(); abort = true; calledHelp = true; }
105                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
106                 
107                 else {
108                         vector<string> myArray = setParameters();
109                         
110                         OptionParser parser(option);
111                         map<string, string> parameters = parser.getParameters();
112                         map<string, string>::iterator it;
113                         
114                         ValidParameters validParameter;
115                 
116                         //check to make sure all parameters are valid for command
117                         for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
118                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
119                         }
120                         
121                         //initialize outputTypes
122                         vector<string> tempOutNames;
123                         outputTypes["summary"] = tempOutNames;
124             outputTypes["phylip"] = tempOutNames;
125                         
126                         //if the user changes the input directory command factory will send this info to us in the output parameter 
127                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
128                         if (inputDir == "not found"){   inputDir = "";          }
129                         else {
130                                 string path;
131                                 it = parameters.find("shared");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
137                                 }
138                         }
139                         
140                         //get shared file
141                         sharedfile = validParameter.validFile(parameters, "shared", true);
142                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
143                         else if (sharedfile == "not found") { 
144                                 //if there is a current shared file, use it
145                                 sharedfile = m->getSharedFile(); 
146                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
147                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
148                         }else { m->setSharedFile(sharedfile); }
149                         
150                         
151                         //if the user changes the output directory command factory will send this info to us in the output parameter 
152                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
153                         
154
155                         //check for optional parameter and set defaults
156                         // ...at some point should added some additional type checking...
157                         label = validParameter.validFile(parameters, "label", false);                   
158                         if (label == "not found") { label = ""; }
159                         else { 
160                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
161                                 else { allLines = 1;  }
162                         }
163                         
164                                         
165                         calc = validParameter.validFile(parameters, "calc", false);                     
166                         if (calc == "not found") { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
167                         else { 
168                                  if (calc == "default")  {  calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
169                         }
170                         m->splitAtDash(calc, Estimators);
171                         if (m->inUsersGroups("citation", Estimators)) { 
172                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
173                                 //remove citation from list of calcs
174                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
175                         }
176                         
177                         groups = validParameter.validFile(parameters, "groups", false);                 
178                         if (groups == "not found") { groups = ""; }
179                         else { 
180                                 m->splitAtDash(groups, Groups);
181                                 m->setGroups(Groups);
182                         }
183                         
184                         string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
185                         all = m->isTrue(temp);
186                         
187                         temp = validParameter.validFile(parameters, "distance", false);                                 if (temp == "not found") { temp = "false"; }
188                         createPhylip = m->isTrue(temp);
189                         
190             temp = validParameter.validFile(parameters, "iters", false);                        if (temp == "not found") { temp = "1000"; }
191                         m->mothurConvert(temp, iters); 
192             
193             output = validParameter.validFile(parameters, "output", false);             
194             if(output == "not found"){  output = "lt"; }
195             else { createPhylip = true; }
196                         if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
197             
198             temp = validParameter.validFile(parameters, "subsample", false);            if (temp == "not found") { temp = "F"; }
199                         if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
200             else {  
201                 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
202                 else { subsample = false; }
203             }
204             
205             if (subsample == false) { iters = 0; }
206             
207                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
208                         m->setProcessors(temp);
209                         m->mothurConvert(temp, processors); 
210                         
211                         if (abort == false) {
212                         
213                                 ValidCalculators validCalculator;
214                                 int i;
215                                 
216                                 for (i=0; i<Estimators.size(); i++) {
217                                         if (validCalculator.isValidCalculator("sharedsummary", Estimators[i]) == true) { 
218                                                 if (Estimators[i] == "sharedsobs") { 
219                                                         sumCalculators.push_back(new SharedSobsCS());
220                                                 }else if (Estimators[i] == "sharedchao") { 
221                                                         sumCalculators.push_back(new SharedChao1());
222                                                 }else if (Estimators[i] == "sharedace") { 
223                                                         sumCalculators.push_back(new SharedAce());
224                                                 }else if (Estimators[i] == "jabund") {  
225                                                         sumCalculators.push_back(new JAbund());
226                                                 }else if (Estimators[i] == "sorabund") { 
227                                                         sumCalculators.push_back(new SorAbund());
228                                                 }else if (Estimators[i] == "jclass") { 
229                                                         sumCalculators.push_back(new Jclass());
230                                                 }else if (Estimators[i] == "sorclass") { 
231                                                         sumCalculators.push_back(new SorClass());
232                                                 }else if (Estimators[i] == "jest") { 
233                                                         sumCalculators.push_back(new Jest());
234                                                 }else if (Estimators[i] == "sorest") { 
235                                                         sumCalculators.push_back(new SorEst());
236                                                 }else if (Estimators[i] == "thetayc") { 
237                                                         sumCalculators.push_back(new ThetaYC());
238                                                 }else if (Estimators[i] == "thetan") { 
239                                                         sumCalculators.push_back(new ThetaN());
240                                                 }else if (Estimators[i] == "kstest") { 
241                                                         sumCalculators.push_back(new KSTest());
242                                                 }else if (Estimators[i] == "sharednseqs") { 
243                                                         sumCalculators.push_back(new SharedNSeqs());
244                                                 }else if (Estimators[i] == "ochiai") { 
245                                                         sumCalculators.push_back(new Ochiai());
246                                                 }else if (Estimators[i] == "anderberg") { 
247                                                         sumCalculators.push_back(new Anderberg());
248                                                 }else if (Estimators[i] == "kulczynski") { 
249                                                         sumCalculators.push_back(new Kulczynski());
250                                                 }else if (Estimators[i] == "kulczynskicody") { 
251                                                         sumCalculators.push_back(new KulczynskiCody());
252                                                 }else if (Estimators[i] == "lennon") { 
253                                                         sumCalculators.push_back(new Lennon());
254                                                 }else if (Estimators[i] == "morisitahorn") { 
255                                                         sumCalculators.push_back(new MorHorn());
256                                                 }else if (Estimators[i] == "braycurtis") { 
257                                                         sumCalculators.push_back(new BrayCurtis());
258                                                 }else if (Estimators[i] == "whittaker") { 
259                                                         sumCalculators.push_back(new Whittaker());
260                                                 }else if (Estimators[i] == "odum") { 
261                                                         sumCalculators.push_back(new Odum());
262                                                 }else if (Estimators[i] == "canberra") { 
263                                                         sumCalculators.push_back(new Canberra());
264                                                 }else if (Estimators[i] == "structeuclidean") { 
265                                                         sumCalculators.push_back(new StructEuclidean());
266                                                 }else if (Estimators[i] == "structchord") { 
267                                                         sumCalculators.push_back(new StructChord());
268                                                 }else if (Estimators[i] == "hellinger") { 
269                                                         sumCalculators.push_back(new Hellinger());
270                                                 }else if (Estimators[i] == "manhattan") { 
271                                                         sumCalculators.push_back(new Manhattan());
272                                                 }else if (Estimators[i] == "structpearson") { 
273                                                         sumCalculators.push_back(new StructPearson());
274                                                 }else if (Estimators[i] == "soergel") { 
275                                                         sumCalculators.push_back(new Soergel());
276                                                 }else if (Estimators[i] == "spearman") { 
277                                                         sumCalculators.push_back(new Spearman());
278                                                 }else if (Estimators[i] == "structkulczynski") { 
279                                                         sumCalculators.push_back(new StructKulczynski());
280                                                 }else if (Estimators[i] == "speciesprofile") { 
281                                                         sumCalculators.push_back(new SpeciesProfile());
282                                                 }else if (Estimators[i] == "hamming") { 
283                                                         sumCalculators.push_back(new Hamming());
284                                                 }else if (Estimators[i] == "structchi2") { 
285                                                         sumCalculators.push_back(new StructChi2());
286                                                 }else if (Estimators[i] == "gower") { 
287                                                         sumCalculators.push_back(new Gower());
288                                                 }else if (Estimators[i] == "memchi2") { 
289                                                         sumCalculators.push_back(new MemChi2());
290                                                 }else if (Estimators[i] == "memchord") { 
291                                                         sumCalculators.push_back(new MemChord());
292                                                 }else if (Estimators[i] == "memeuclidean") { 
293                                                         sumCalculators.push_back(new MemEuclidean());
294                                                 }else if (Estimators[i] == "mempearson") { 
295                                                         sumCalculators.push_back(new MemPearson());
296                                                 }
297                                         }
298                                 }
299                                 
300                                 mult = false;
301                         }
302                 }
303         }
304         catch(exception& e) {
305                 m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
306                 exit(1);
307         }
308 }
309 //**********************************************************************************************************************
310
311 int SummarySharedCommand::execute(){
312         try {
313         
314                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
315                 
316                 ofstream outputFileHandle, outAll;
317         map<string, string> variables; 
318                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
319                 string outputFileName = getOutputFileName("summary",variables);
320                 
321                 //if the users entered no valid calculators don't execute command
322                 if (sumCalculators.size() == 0) { return 0; }
323                 //check if any calcs can do multiples
324                 else{
325                         if (all){ 
326                                 for (int i = 0; i < sumCalculators.size(); i++) {
327                                         if (sumCalculators[i]->getMultiple() == true) { mult = true; }
328                                 }
329                         }
330                 }
331                         
332                 input = new InputData(sharedfile, "sharedfile");
333                 lookup = input->getSharedRAbundVectors();
334                 string lastLabel = lookup[0]->getLabel();
335         
336                 /******************************************************/
337                 //output headings for files
338                 /******************************************************/
339                 //output estimator names as column headers
340                 m->openOutputFile(outputFileName, outputFileHandle);
341                 outputFileHandle << "label" <<'\t' << "comparison" << '\t'; 
342                 for(int i=0;i<sumCalculators.size();i++){
343                         outputFileHandle << '\t' << sumCalculators[i]->getName();
344                         if (sumCalculators[i]->getCols() == 3) {   outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";  }
345                 }
346                 outputFileHandle << endl;
347                 outputFileHandle.close();
348                 
349                 //create file and put column headers for multiple groups file
350         variables["[tag]"]= "multiple";
351                 string outAllFileName = getOutputFileName("summary",variables);
352                 if (mult == true) {
353                         m->openOutputFile(outAllFileName, outAll);
354                         outputNames.push_back(outAllFileName);
355                         
356                         outAll << "label" <<'\t' << "comparison" << '\t'; 
357                         for(int i=0;i<sumCalculators.size();i++){
358                                 if (sumCalculators[i]->getMultiple() == true) { 
359                                         outAll << '\t' << sumCalculators[i]->getName();
360                                 }
361                         }
362                         outAll << endl;
363                         outAll.close();
364                 }
365                 
366                 if (lookup.size() < 2) { 
367                         m->mothurOut("I cannot run the command without at least 2 valid groups."); 
368                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
369                         
370                         //close files and clean up
371                         m->mothurRemove(outputFileName);
372                         if (mult == true) { m->mothurRemove(outAllFileName);  }
373                         return 0;
374                 //if you only have 2 groups you don't need a .sharedmultiple file
375                 }else if ((lookup.size() == 2) && (mult == true)) { 
376                         mult = false;
377                         m->mothurRemove(outAllFileName);
378                         outputNames.pop_back();
379                 }
380                 
381                 if (m->control_pressed) {
382                         if (mult) {  m->mothurRemove(outAllFileName);  }
383                         m->mothurRemove(outputFileName); 
384                         delete input;
385                         for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
386                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
387                         m->clearGroups(); 
388                         return 0;
389                 }
390                 /******************************************************/
391         if (subsample) { 
392             if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
393                 subsampleSize = lookup[0]->getNumSeqs();
394                 for (int i = 1; i < lookup.size(); i++) {
395                     int thisSize = lookup[i]->getNumSeqs();
396                     
397                     if (thisSize < subsampleSize) {     subsampleSize = thisSize;       }
398                 }
399             }else {
400                 m->clearGroups();
401                 Groups.clear();
402                 vector<SharedRAbundVector*> temp;
403                 for (int i = 0; i < lookup.size(); i++) {
404                     if (lookup[i]->getNumSeqs() < subsampleSize) { 
405                         m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
406                         delete lookup[i];
407                     }else { 
408                         Groups.push_back(lookup[i]->getGroup()); 
409                         temp.push_back(lookup[i]);
410                     }
411                 } 
412                 lookup = temp;
413                 m->setGroups(Groups);
414             }
415             
416             if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; delete input; return 0; }
417         }
418
419                 
420                 /******************************************************/
421                 //comparison breakup to be used by different processes later
422                 numGroups = lookup.size();
423                 lines.resize(processors);
424                 for (int i = 0; i < processors; i++) {
425                         lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
426                         lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
427                 }               
428                 /******************************************************/
429                 
430                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
431                 set<string> processedLabels;
432                 set<string> userLabels = labels;
433                         
434                 //as long as you are not at the end of the file or done wih the lines you want
435                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
436                         if (m->control_pressed) {
437                                 if (mult) {  m->mothurRemove(outAllFileName);  }
438                                 m->mothurRemove(outputFileName); 
439                                 delete input; 
440                                 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
441                                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
442                                 m->clearGroups(); 
443                                 return 0;
444                         }
445
446                 
447                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
448                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
449                                 process(lookup, outputFileName, outAllFileName);
450                                 
451                                 processedLabels.insert(lookup[0]->getLabel());
452                                 userLabels.erase(lookup[0]->getLabel());
453                         }
454                         
455                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
456                                         string saveLabel = lookup[0]->getLabel();
457                                         
458                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
459                                         lookup = input->getSharedRAbundVectors(lastLabel);
460
461                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
462                                         process(lookup, outputFileName, outAllFileName);
463                                         
464                                         processedLabels.insert(lookup[0]->getLabel());
465                                         userLabels.erase(lookup[0]->getLabel());
466                                         
467                                         //restore real lastlabel to save below
468                                         lookup[0]->setLabel(saveLabel);
469                         }
470                         
471                         lastLabel = lookup[0]->getLabel();                      
472                                 
473                         //get next line to process
474                         //prevent memory leak
475                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
476                         lookup = input->getSharedRAbundVectors();
477                 }
478                 
479                 if (m->control_pressed) {
480                         if (mult) { m->mothurRemove(outAllFileName);  }
481                         m->mothurRemove(outputFileName); 
482                         delete input; 
483                         for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
484                         m->clearGroups(); 
485                         return 0;
486                 }
487
488                 //output error messages about any remaining user labels
489                 set<string>::iterator it;
490                 bool needToRun = false;
491                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
492                         m->mothurOut("Your file does not include the label " + *it); 
493                         if (processedLabels.count(lastLabel) != 1) {
494                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
495                                 needToRun = true;
496                         }else {
497                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
498                         }
499                 }
500                 
501                 //run last label if you need to
502                 if (needToRun == true)  {
503                                 for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } } 
504                                 lookup = input->getSharedRAbundVectors(lastLabel);
505
506                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
507                                 process(lookup, outputFileName, outAllFileName);
508                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
509                 }
510                 
511                                 
512                 //reset groups parameter
513                 m->clearGroups();  
514                 
515                 for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
516                 delete input;  
517                 
518                 if (m->control_pressed) {
519                         m->mothurRemove(outAllFileName);  
520                         m->mothurRemove(outputFileName); 
521                         return 0;
522                 }
523                 
524                 m->mothurOutEndLine();
525                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
526                 m->mothurOut(outputFileName); m->mothurOutEndLine();    
527                 if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine();        outputTypes["summary"].push_back(outAllFileName); }
528                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    } outputTypes["summary"].push_back(outputFileName);
529                 m->mothurOutEndLine();
530
531                 return 0;
532         }
533         catch(exception& e) {
534                 m->errorOut(e, "SummarySharedCommand", "execute");
535                 exit(1);
536         }
537 }
538 /***********************************************************/
539 int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
540         try {
541                 
542                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
543                 
544                 //output num seqs
545                 out << simMatrix.size() << endl;
546                 
547                 if (output == "lt") {
548                         for (int b = 0; b < simMatrix.size(); b++)      {
549                                 out << lookup[b]->getGroup() << '\t';
550                                 for (int n = 0; n < b; n++)     {
551                     if (m->control_pressed) { return 0; }
552                                         out << simMatrix[b][n] << '\t'; 
553                                 }
554                                 out << endl;
555                         }
556                 }else{
557                         for (int b = 0; b < simMatrix.size(); m++)      {
558                                 out << lookup[b]->getGroup() << '\t';
559                                 for (int n = 0; n < simMatrix[b].size(); n++)   {
560                     if (m->control_pressed) { return 0; }
561                                         out << simMatrix[b][n] << '\t'; 
562                                 }
563                                 out << endl;
564                         }
565                 }
566         
567         return 0;
568         }
569         catch(exception& e) {
570                 m->errorOut(e, "SummarySharedCommand", "printSims");
571                 exit(1);
572         }
573 }
574 /***********************************************************/
575 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
576         try {
577         vector< vector< vector<seqDist> > > calcDistsTotals;  //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
578         vector< vector<seqDist>  > calcDists; calcDists.resize(sumCalculators.size());          
579         
580         for (int thisIter = 0; thisIter < iters+1; thisIter++) {
581             
582             vector<SharedRAbundVector*> thisItersLookup = thisLookup;
583             
584             if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
585                 SubSample sample;
586                 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
587                 
588                 //make copy of lookup so we don't get access violations
589                 vector<SharedRAbundVector*> newLookup;
590                 for (int k = 0; k < thisItersLookup.size(); k++) {
591                     SharedRAbundVector* temp = new SharedRAbundVector();
592                     temp->setLabel(thisItersLookup[k]->getLabel());
593                     temp->setGroup(thisItersLookup[k]->getGroup());
594                     newLookup.push_back(temp);
595                 }
596                 
597                 //for each bin
598                 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
599                     if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
600                     for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
601                 }
602                 
603                 tempLabels = sample.getSample(newLookup, subsampleSize);
604                 thisItersLookup = newLookup;
605             }
606         
607             
608             if(processors == 1){
609                 driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
610                 m->appendFiles((sumFileName + ".temp"), sumFileName);
611                 m->mothurRemove((sumFileName + ".temp"));
612                 if (mult) {
613                     m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
614                     m->mothurRemove((sumAllFileName + ".temp"));
615                 }
616             }else{
617                 
618                 int process = 1;
619                 vector<int> processIDS;
620                 
621 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
622                 //loop through and create all the processes you want
623                 while (process != processors) {
624                     int pid = fork();
625                     
626                     if (pid > 0) {
627                         processIDS.push_back(pid); 
628                         process++;
629                     }else if (pid == 0){
630                         driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
631                         
632                         //only do this if you want a distance file
633                         if (createPhylip) {
634                             string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
635                             ofstream outtemp;
636                             m->openOutputFile(tempdistFileName, outtemp);
637                             
638                             for (int i = 0; i < calcDists.size(); i++) {
639                                 outtemp << calcDists[i].size() << endl;
640                                 
641                                 for (int j = 0; j < calcDists[i].size(); j++) {
642                                     outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
643                                 }
644                             }
645                             outtemp.close();
646                         }
647                         
648                         exit(0);
649                     }else { 
650                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
651                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
652                         exit(0);
653                     }
654                 }
655                 
656                 //parent do your part
657                 driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
658                 m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
659                 m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
660                 if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
661                 
662                 //force parent to wait until all the processes are done
663                 for (int i = 0; i < processIDS.size(); i++) {
664                     int temp = processIDS[i];
665                     wait(&temp);
666                 }
667                 
668                 for (int i = 0; i < processIDS.size(); i++) {
669                     m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
670                     m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
671                     if (mult) { m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp"));  }
672                     
673                     if (createPhylip) {
674                         string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) +  ".dist";
675                         ifstream intemp;
676                         m->openInputFile(tempdistFileName, intemp);
677                         
678                         for (int k = 0; k < calcDists.size(); k++) {
679                             int size = 0;
680                             intemp >> size; m->gobble(intemp);
681                             
682                             for (int j = 0; j < size; j++) {
683                                 int seq1 = 0;
684                                 int seq2 = 0;
685                                 float dist = 1.0;
686                                 
687                                 intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
688                                 
689                                 seqDist tempDist(seq1, seq2, dist);
690                                 calcDists[k].push_back(tempDist);
691                             }
692                         }
693                         intemp.close();
694                         m->mothurRemove(tempdistFileName);
695                     }
696                 }
697 #else
698                 //////////////////////////////////////////////////////////////////////////////////////////////////////
699                 //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
700                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
701                 //Taking advantage of shared memory to pass results vectors.
702                 //////////////////////////////////////////////////////////////////////////////////////////////////////
703                 
704                 vector<summarySharedData*> pDataArray; 
705                 DWORD   dwThreadIdArray[processors-1];
706                 HANDLE  hThreadArray[processors-1];
707                 
708                 //Create processor worker threads.
709                 for( int i=1; i<processors; i++ ){
710                     
711                     //make copy of lookup so we don't get access violations
712                     vector<SharedRAbundVector*> newLookup;
713                     for (int k = 0; k < thisLookup.size(); k++) {
714                         SharedRAbundVector* temp = new SharedRAbundVector();
715                         temp->setLabel(thisLookup[k]->getLabel());
716                         temp->setGroup(thisLookup[k]->getGroup());
717                         newLookup.push_back(temp);
718                     }
719                 
720                     
721                     //for each bin
722                     for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
723                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
724                         for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
725                     }
726                     
727                     // Allocate memory for thread data.
728                     summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
729                     pDataArray.push_back(tempSum);
730                     processIDS.push_back(i);
731                     
732                     hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
733                 }
734                 
735                 //parent do your part
736                 driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);   
737                 m->appendFiles((sumFileName + "0.temp"), sumFileName);
738                 m->mothurRemove((sumFileName + "0.temp"));
739                 if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
740                 
741                 //Wait until all threads have terminated.
742                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
743                 
744                 //Close all thread handles and free memory allocations.
745                 for(int i=0; i < pDataArray.size(); i++){
746                     if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
747                         m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
748                     }
749                     m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
750                     m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
751                     
752                     for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
753                     
754                     if (createPhylip) {
755                         for (int k = 0; k < calcDists.size(); k++) {
756                             int size = pDataArray[i]->calcDists[k].size();
757                             for (int j = 0; j < size; j++) {    calcDists[k].push_back(pDataArray[i]->calcDists[k][j]);    }
758                         }
759                     }
760                     
761                     CloseHandle(hThreadArray[i]);
762                     delete pDataArray[i];
763                 }
764                 
765 #endif
766             }
767             
768             if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
769                 
770                 calcDistsTotals.push_back(calcDists); 
771                 //clean up memory
772                 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
773                 thisItersLookup.clear();
774             }else {
775                 if (createPhylip) {
776                     for (int i = 0; i < calcDists.size(); i++) {
777                         if (m->control_pressed) { break; }
778                         
779                         //initialize matrix
780                         vector< vector<double> > matrix; //square matrix to represent the distance
781                         matrix.resize(thisLookup.size());
782                         for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
783                         
784                         for (int j = 0; j < calcDists[i].size(); j++) {
785                             int row = calcDists[i][j].seq1;
786                             int column = calcDists[i][j].seq2;
787                             double dist = calcDists[i][j].dist;
788                             
789                             matrix[row][column] = dist;
790                             matrix[column][row] = dist;
791                         }
792                         
793                         map<string, string> variables; 
794                         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
795                         variables["[calc]"] = sumCalculators[i]->getName();
796                         variables["[distance]"] = thisLookup[0]->getLabel();
797                         variables["[outputtag]"] = output;
798                         variables["[tag2]"] = "";
799                         string distFileName = getOutputFileName("phylip",variables);
800                         outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
801                         ofstream outDist;
802                         m->openOutputFile(distFileName, outDist);
803                         outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
804                         
805                         printSims(outDist, matrix);
806                         
807                         outDist.close();
808                     }
809                 }
810             }
811             for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
812                 }
813
814         if (iters != 0) {
815             //we need to find the average distance and standard deviation for each groups distance
816             vector< vector<seqDist>  > calcAverages = m->getAverages(calcDistsTotals);
817             
818             //find standard deviation
819             vector< vector<seqDist>  > stdDev = m->getStandardDeviation(calcDistsTotals, calcAverages); 
820             
821             //print results
822             for (int i = 0; i < calcDists.size(); i++) {
823                 vector< vector<double> > matrix; //square matrix to represent the distance
824                 matrix.resize(thisLookup.size());
825                 for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
826                 
827                 vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
828                 stdmatrix.resize(thisLookup.size());
829                 for (int k = 0; k < thisLookup.size(); k++) {  stdmatrix[k].resize(thisLookup.size(), 0.0); }
830                 
831                 
832                 for (int j = 0; j < calcAverages[i].size(); j++) {
833                     int row = calcAverages[i][j].seq1;
834                     int column = calcAverages[i][j].seq2;
835                     float dist = calcAverages[i][j].dist;
836                     float stdDist = stdDev[i][j].dist;
837                     
838                     matrix[row][column] = dist;
839                     matrix[column][row] = dist;
840                     stdmatrix[row][column] = stdDist;
841                     stdmatrix[column][row] = stdDist;
842                 }
843                 
844                 map<string, string> variables; 
845                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
846                 variables["[calc]"] = sumCalculators[i]->getName();
847                 variables["[distance]"] = thisLookup[0]->getLabel();
848                 variables["[outputtag]"] = output;
849                 variables["[tag2]"] = "ave";
850                 string distFileName = getOutputFileName("phylip",variables);
851                 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
852                 ofstream outAve;
853                 m->openOutputFile(distFileName, outAve);
854                 outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
855                 
856                 printSims(outAve, matrix);
857                 
858                 outAve.close();
859                 
860                 variables["[tag2]"] = "std";
861                 distFileName = getOutputFileName("phylip",variables);
862                 outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
863                 ofstream outSTD;
864                 m->openOutputFile(distFileName, outSTD);
865                 outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
866                 
867                 printSims(outSTD, stdmatrix);
868                 
869                 outSTD.close();
870                 
871             }
872         }
873         
874         return 0;
875         }
876         catch(exception& e) {
877                 m->errorOut(e, "SummarySharedCommand", "process");
878                 exit(1);
879         }
880 }
881 /**************************************************************************************************/
882 int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) { 
883         try {
884                 
885                 //loop through calculators and add to file all for all calcs that can do mutiple groups
886                 if (mult == true) {
887                         ofstream outAll;
888                         m->openOutputFile(sumAllFile, outAll);
889                         
890                         //output label
891                         outAll << thisLookup[0]->getLabel() << '\t';
892                         
893                         //output groups names
894                         string outNames = "";
895                         for (int j = 0; j < thisLookup.size(); j++) {
896                                 outNames += thisLookup[j]->getGroup() +  "-";
897                         }
898                         outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
899                         outAll << outNames << '\t';
900                         
901                         for(int i=0;i<sumCalculators.size();i++){
902                                 if (sumCalculators[i]->getMultiple() == true) { 
903                                         sumCalculators[i]->getValues(thisLookup);
904                                         
905                                         if (m->control_pressed) { outAll.close(); return 1; }
906                                         
907                                         outAll << '\t';
908                                         sumCalculators[i]->print(outAll);
909                                 }
910                         }
911                         outAll << endl;
912                         outAll.close();
913                 }
914                 
915                 ofstream outputFileHandle;
916                 m->openOutputFile(sumFile, outputFileHandle);
917                 
918                 vector<SharedRAbundVector*> subset;
919                 for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
920
921                         for (int l = 0; l < k; l++) {
922                                 
923                                 outputFileHandle << thisLookup[0]->getLabel() << '\t';
924                                 
925                                 subset.clear(); //clear out old pair of sharedrabunds
926                                 //add new pair of sharedrabunds
927                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
928                                 
929                                 //sort groups to be alphanumeric
930                                 if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
931                                         outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
932                                 }else{
933                                         outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
934                                 }
935                                 
936                                 for(int i=0;i<sumCalculators.size();i++) {
937                                         
938                                         //if this calc needs all groups to calculate the pair load all groups
939                                         if (sumCalculators[i]->getNeedsAll()) { 
940                                                 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
941                                                 for (int w = 0; w < thisLookup.size(); w++) {
942                                                         if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
943                                                 }
944                                         }
945                                         
946                                         vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
947                                         
948                                         if (m->control_pressed) { outputFileHandle.close(); return 1; }
949                                         
950                                         outputFileHandle << '\t';
951                                         sumCalculators[i]->print(outputFileHandle);
952                                         
953                                         seqDist temp(l, k, tempdata[0]);
954                                         calcDists[i].push_back(temp);
955                                 }
956                                 outputFileHandle << endl;
957                         }
958                 }
959                 
960                 outputFileHandle.close();
961                 
962                 return 0;
963         }
964         catch(exception& e) {
965                 m->errorOut(e, "SummarySharedCommand", "driver");
966                 exit(1);
967         }
968 }
969 /**************************************************************************************************/
970
971