]> git.donarmstrong.com Git - mothur.git/blob - anosimcommand.cpp
finished anosim command
[mothur.git] / anosimcommand.cpp
1 /*
2  *  anosimcommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 2/14/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "anosimcommand.h"
11 #include "sharedutilities.h"
12 #include "sharedsobscollectsummary.h"
13 #include "sharedchao1.h"
14 #include "sharedace.h"
15 #include "sharednseqs.h"
16 #include "sharedjabund.h"
17 #include "sharedsorabund.h"
18 #include "sharedjclass.h"
19 #include "sharedsorclass.h"
20 #include "sharedjest.h"
21 #include "sharedsorest.h"
22 #include "sharedthetayc.h"
23 #include "sharedthetan.h"
24 #include "sharedkstest.h"
25 #include "whittaker.h"
26 #include "sharedochiai.h"
27 #include "sharedanderbergs.h"
28 #include "sharedkulczynski.h"
29 #include "sharedkulczynskicody.h"
30 #include "sharedlennon.h"
31 #include "sharedmorisitahorn.h"
32 #include "sharedbraycurtis.h"
33 #include "sharedjackknife.h"
34 #include "whittaker.h"
35 #include "odum.h"
36 #include "canberra.h"
37 #include "structeuclidean.h"
38 #include "structchord.h"
39 #include "hellinger.h"
40 #include "manhattan.h"
41 #include "structpearson.h"
42 #include "soergel.h"
43 #include "spearman.h"
44 #include "structkulczynski.h"
45 #include "structchi2.h"
46 #include "speciesprofile.h"
47 #include "hamming.h"
48 #include "gower.h"
49 #include "memchi2.h"
50 #include "memchord.h"
51 #include "memeuclidean.h"
52 #include "mempearson.h"
53
54 //**********************************************************************************************************************
55 vector<string> AnosimCommand::getValidParameters(){     
56         try {
57                 string Array[] =  {"groups","label","outputdir","iters","phylip","design","sets","processors","inputdir"};
58                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
59                 return myArray;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "AnosimCommand", "getValidParameters");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 AnosimCommand::AnosimCommand(){ 
68         try {
69                 abort = true; calledHelp = true; 
70                 vector<string> tempOutNames;
71                 outputTypes["anosim"] = tempOutNames;
72         }
73         catch(exception& e) {
74                 m->errorOut(e, "AnosimCommand", "AnosimCommand");
75                 exit(1);
76         }
77 }
78 //**********************************************************************************************************************
79 vector<string> AnosimCommand::getRequiredParameters(){  
80         try {
81                 string Array[] =  {"design"};
82                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
83                 return myArray;
84         }
85         catch(exception& e) {
86                 m->errorOut(e, "AnosimCommand", "getRequiredParameters");
87                 exit(1);
88         }
89 }
90 //**********************************************************************************************************************
91 vector<string> AnosimCommand::getRequiredFiles(){       
92         try {
93                 string Array[] =  {};
94                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
95                 return myArray;
96         }
97         catch(exception& e) {
98                 m->errorOut(e, "AnosimCommand", "getRequiredFiles");
99                 exit(1);
100         }
101 }
102 //**********************************************************************************************************************
103
104 AnosimCommand::AnosimCommand(string option) {
105         try {
106                 globaldata = GlobalData::getInstance();
107                 abort = false; calledHelp = false;   
108                 allLines = 1;
109                 labels.clear();
110                 
111                 //allow user to run help
112                 if(option == "help") { help(); abort = true; calledHelp = true; }
113                 
114                 else {
115                         //valid paramters for this command
116                         string AlignArray[] =  {"groups","label","outputdir","iters","phylip","design","sets","processors","inputdir"};
117                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
118                         
119                         OptionParser parser(option);
120                         map<string,string> parameters = parser.getParameters();
121                         
122                         ValidParameters validParameter;
123                         
124                         //check to make sure all parameters are valid for command
125                         map<string,string>::iterator it;
126                         for (it = parameters.begin(); it != parameters.end(); it++) { 
127                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
128                         }
129                         
130                         //initialize outputTypes
131                         vector<string> tempOutNames;
132                         outputTypes["anosim"] = tempOutNames;
133                         
134                         //if the user changes the output directory command factory will send this info to us in the output parameter 
135                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
136                         
137                         //if the user changes the input directory command factory will send this info to us in the output parameter 
138                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
139                         if (inputDir == "not found"){   inputDir = "";          }
140                         else {
141                                 string path;
142                                 it = parameters.find("design");
143                                 //user has given a template file
144                                 if(it != parameters.end()){ 
145                                         path = m->hasPath(it->second);
146                                         //if the user has not given a path then, add inputdir. else leave path alone.
147                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
148                                 }
149                                 
150                                 it = parameters.find("phylip");
151                                 //user has given a template file
152                                 if(it != parameters.end()){ 
153                                         path = m->hasPath(it->second);
154                                         //if the user has not given a path then, add inputdir. else leave path alone.
155                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
156                                 }
157                         }
158                         
159                         phylipfile = validParameter.validFile(parameters, "phylip", true);
160                         if (phylipfile == "not open") { phylipfile = ""; abort = true; }
161                         else if (phylipfile == "not found") { phylipfile = ""; }        
162                         else {  globaldata->newRead(); format = "phylip";  globaldata->setPhylipFile(phylipfile);       }
163                         
164                         //check for required parameters
165                         designfile = validParameter.validFile(parameters, "design", true);
166                         if (designfile == "not open") { abort = true; }
167                         else if (designfile == "not found") {  designfile = "";  m->mothurOut("You must provide an design file."); m->mothurOutEndLine(); abort = true; }       
168                         
169                         //make sure the user has already run the read.otu command
170                         if ((globaldata->getSharedFile() == "")) {
171                                 if ((phylipfile == "")) {
172                                         m->mothurOut("You must read a list and a group, a shared file or provide a distance matrix before you can use the anosim command."); m->mothurOutEndLine(); abort = true; 
173                                 }
174                         }else { sharedfile = globaldata->getSharedFile(); } 
175                         
176                         //use distance matrix if one is provided
177                         if ((sharedfile != "") && (phylipfile != "")) { sharedfile = ""; }
178                         
179                         //check for optional parameter and set defaults
180                         // ...at some point should added some additional type checking...
181                         label = validParameter.validFile(parameters, "label", false);                   
182                         if (label == "not found") { label = ""; }
183                         else { 
184                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
185                                 else { allLines = 1;  }
186                         }
187                         
188                         //if the user has not specified any labels use the ones from read.otu
189                         if (label == "") {  
190                                 allLines = globaldata->allLines; 
191                                 labels = globaldata->labels; 
192                         }
193                         
194                         groups = validParameter.validFile(parameters, "groups", false);                 
195                         if (groups == "not found") { groups = "";  }
196                         else { 
197                                 m->splitAtDash(groups, Groups);
198                                 globaldata->Groups = Groups;
199                         }
200                         
201                         sets = validParameter.validFile(parameters, "sets", false);                     
202                         if (sets == "not found") { sets = ""; pickedGroups = false; }
203                         else { 
204                                 pickedGroups = true;
205                                 m->splitAtDash(sets, Sets);
206                         }
207                         
208                         
209                         string temp = validParameter.validFile(parameters, "iters", false);                     if (temp == "not found") { temp = "1000"; }
210                         convert(temp, iters); 
211                         
212                         temp = validParameter.validFile(parameters, "processors", false);                       if (temp == "not found"){       temp = "1";     }
213                         convert(temp, processors); 
214                         
215                         vector<string> Estimators;
216                         calc = validParameter.validFile(parameters, "calc", false);                     
217                         if (calc == "not found") { calc = "morisitahorn";  }
218                         m->splitAtDash(calc, Estimators);
219                         
220                         if (abort == false) {
221                                 
222                                 ValidCalculators* validCalculator = new ValidCalculators();
223                                 
224                                 for (int i=0; i<Estimators.size(); i++) {
225                                         if (validCalculator->isValidCalculator("treegroup", Estimators[i]) == true) { 
226                                                 if (Estimators[i] == "sharedsobs") { 
227                                                         calculators.push_back(new SharedSobsCS());
228                                                 }else if (Estimators[i] == "sharedchao") { 
229                                                         calculators.push_back(new SharedChao1());
230                                                 }else if (Estimators[i] == "sharedace") { 
231                                                         calculators.push_back(new SharedAce());
232                                                 }else if (Estimators[i] == "jabund") {  
233                                                         calculators.push_back(new JAbund());
234                                                 }else if (Estimators[i] == "sorabund") { 
235                                                         calculators.push_back(new SorAbund());
236                                                 }else if (Estimators[i] == "jclass") { 
237                                                         calculators.push_back(new Jclass());
238                                                 }else if (Estimators[i] == "sorclass") { 
239                                                         calculators.push_back(new SorClass());
240                                                 }else if (Estimators[i] == "jest") { 
241                                                         calculators.push_back(new Jest());
242                                                 }else if (Estimators[i] == "sorest") { 
243                                                         calculators.push_back(new SorEst());
244                                                 }else if (Estimators[i] == "thetayc") { 
245                                                         calculators.push_back(new ThetaYC());
246                                                 }else if (Estimators[i] == "thetan") { 
247                                                         calculators.push_back(new ThetaN());
248                                                 }else if (Estimators[i] == "kstest") { 
249                                                         calculators.push_back(new KSTest());
250                                                 }else if (Estimators[i] == "sharednseqs") { 
251                                                         calculators.push_back(new SharedNSeqs());
252                                                 }else if (Estimators[i] == "ochiai") { 
253                                                         calculators.push_back(new Ochiai());
254                                                 }else if (Estimators[i] == "anderberg") { 
255                                                         calculators.push_back(new Anderberg());
256                                                 }else if (Estimators[i] == "kulczynski") { 
257                                                         calculators.push_back(new Kulczynski());
258                                                 }else if (Estimators[i] == "kulczynskicody") { 
259                                                         calculators.push_back(new KulczynskiCody());
260                                                 }else if (Estimators[i] == "lennon") { 
261                                                         calculators.push_back(new Lennon());
262                                                 }else if (Estimators[i] == "morisitahorn") { 
263                                                         calculators.push_back(new MorHorn());
264                                                 }else if (Estimators[i] == "braycurtis") { 
265                                                         calculators.push_back(new BrayCurtis());
266                                                 }else if (Estimators[i] == "whittaker") { 
267                                                         calculators.push_back(new Whittaker());
268                                                 }else if (Estimators[i] == "odum") { 
269                                                         calculators.push_back(new Odum());
270                                                 }else if (Estimators[i] == "canberra") { 
271                                                         calculators.push_back(new Canberra());
272                                                 }else if (Estimators[i] == "structeuclidean") { 
273                                                         calculators.push_back(new StructEuclidean());
274                                                 }else if (Estimators[i] == "structchord") { 
275                                                         calculators.push_back(new StructChord());
276                                                 }else if (Estimators[i] == "hellinger") { 
277                                                         calculators.push_back(new Hellinger());
278                                                 }else if (Estimators[i] == "manhattan") { 
279                                                         calculators.push_back(new Manhattan());
280                                                 }else if (Estimators[i] == "structpearson") { 
281                                                         calculators.push_back(new StructPearson());
282                                                 }else if (Estimators[i] == "soergel") { 
283                                                         calculators.push_back(new Soergel());
284                                                 }else if (Estimators[i] == "spearman") { 
285                                                         calculators.push_back(new Spearman());
286                                                 }else if (Estimators[i] == "structkulczynski") { 
287                                                         calculators.push_back(new StructKulczynski());
288                                                 }else if (Estimators[i] == "speciesprofile") { 
289                                                         calculators.push_back(new SpeciesProfile());
290                                                 }else if (Estimators[i] == "hamming") { 
291                                                         calculators.push_back(new Hamming());
292                                                 }else if (Estimators[i] == "structchi2") { 
293                                                         calculators.push_back(new StructChi2());
294                                                 }else if (Estimators[i] == "gower") { 
295                                                         calculators.push_back(new Gower());
296                                                 }else if (Estimators[i] == "memchi2") { 
297                                                         calculators.push_back(new MemChi2());
298                                                 }else if (Estimators[i] == "memchord") { 
299                                                         calculators.push_back(new MemChord());
300                                                 }else if (Estimators[i] == "memeuclidean") { 
301                                                         calculators.push_back(new MemEuclidean());
302                                                 }else if (Estimators[i] == "mempearson") { 
303                                                         calculators.push_back(new MemPearson());
304                                                 }
305                                         }
306                                 }
307                         }
308                 }
309                 
310         }
311         catch(exception& e) {
312                 m->errorOut(e, "AnosimCommand", "AnosimCommand");
313                 exit(1);
314         }
315 }
316
317 //**********************************************************************************************************************
318
319 void AnosimCommand::help(){
320         try {
321                 m->mothurOut("The anosim command can only be executed after a successful read.otu command of a list and group or shared file, or by providing a phylip formatted distance matrix.\n");
322                 m->mothurOut("The anosim command outputs a .anosim file. \n");
323                 m->mothurOut("The anosim command parameters are phylip, iters, groups, label, design, sets and processors.  The design parameter is required.\n");
324                 m->mothurOut("The design parameter allows you to assign your groups to sets when you are running anosim. It is required. \n");
325                 m->mothurOut("The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n");
326                 m->mothurOut("The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile. To run the pairwise comparisons of all sets use sets=all.\n");
327                 m->mothurOut("The iters parameter allows you to set number of randomization for the P value.  The default is 1000. \n");
328                 m->mothurOut("The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes. groups=all will find all pairwise comparisons. \n");
329                 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
330                 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
331                 m->mothurOut("The anosim command should be in the following format: anosim(design=yourDesignFile).\n");
332                 m->mothurOut("Example anosim(design=temp.design, groups=A-B-C).\n");
333                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
334                 
335         }
336         catch(exception& e) {
337                 m->errorOut(e, "AnosimCommand", "help");
338                 exit(1);
339         }
340 }
341
342 //**********************************************************************************************************************
343
344 AnosimCommand::~AnosimCommand(){}
345
346 //**********************************************************************************************************************
347
348 int AnosimCommand::execute(){
349         try {
350                 
351                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
352                 
353                 //read design file
354                 designMap = new GroupMap(designfile);
355                 designMap->readDesignMap();
356                 
357                 //make sure sets are all in designMap
358                 SharedUtil* util = new SharedUtil();  
359                 util->setGroups(Sets, designMap->namesOfGroups);  
360                 delete util;
361                 
362                 //if the user pickedGroups or set sets=all, then we want to figure out how to split the pairwise comparison between processors
363                 int numGroups = Sets.size();
364                 if (pickedGroups) {
365                         for (int a=0; a<numGroups; a++) { 
366                                 for (int l = 0; l < a; l++) {   
367                                         vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
368                                         namesOfGroupCombos.push_back(groups);
369                                 }
370                         }
371                 }else { //one giant compare
372                         vector<string> groups;
373                         for (int a=0; a<numGroups; a++) { groups.push_back(Sets[a]); }
374                         namesOfGroupCombos.push_back(groups);
375                 }
376                 
377                 //only 1 combo
378                 if (numGroups == 2) { processors = 1; }
379                 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
380                 
381 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
382                 if(processors != 1){
383                         int numPairs = namesOfGroupCombos.size();
384                         int numPairsPerProcessor = numPairs / processors;
385                         
386                         for (int i = 0; i < processors; i++) {
387                                 int startPos = i * numPairsPerProcessor;
388                                 if(i == processors - 1){
389                                         numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
390                                 }
391                                 lines.push_back(linePair(startPos, numPairsPerProcessor));
392                         }
393                 }
394 #endif
395                 
396                 if (sharedfile != "") { //create distance matrix for each label
397                         
398                         if (outputDir == "") { outputDir = m->hasPath(sharedfile); }
399                         
400                         //for each calculator
401                         for(int i = 0 ; i < calculators.size(); i++) {
402                                 
403                                 //create a new filename
404                                 ofstream out;
405                                 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".anosim";                           
406                                 m->openOutputFile(outputFile, out);
407                                 outputNames.push_back(outputFile); outputTypes["anosim"].push_back(outputFile);
408                                 
409                                 //print headers
410                                 out << "label\tgroupsCompared\tRValue\tpValue" << endl;  
411                                 m->mothurOut("label\tgroupsCompared\tRValue\tpValue"); m->mothurOutEndLine();  
412                                 out.close();
413                         }
414                         
415                         InputData input(sharedfile, "sharedfile");
416                         vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
417                         string lastLabel = lookup[0]->getLabel();
418                         
419                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
420                         set<string> processedLabels;
421                         set<string> userLabels = labels;
422                         
423                         //sanity check between shared file groups and design file groups
424                         for (int i = 0; i < lookup.size(); i++) { 
425                                 string group = designMap->getGroup(lookup[i]->getGroup());
426                                 
427                                 if (group == "not found") { m->control_pressed = true;  m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct."); m->mothurOutEndLine();  }
428                         }
429                         
430                         //as long as you are not at the end of the file or done wih the lines you want
431                         while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
432                                 
433                                 if (m->control_pressed) {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } globaldata->Groups.clear();  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); } return 0; }
434                                 
435                                 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
436                                         
437                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
438                                         process(lookup);
439                                         
440                                         processedLabels.insert(lookup[0]->getLabel());
441                                         userLabels.erase(lookup[0]->getLabel());
442                                 }
443                                 
444                                 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
445                                         string saveLabel = lookup[0]->getLabel();
446                                         
447                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
448                                         lookup = input.getSharedRAbundVectors(lastLabel);
449                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
450                                         
451                                         process(lookup);
452                                         
453                                         processedLabels.insert(lookup[0]->getLabel());
454                                         userLabels.erase(lookup[0]->getLabel());
455                                         
456                                         //restore real lastlabel to save below
457                                         lookup[0]->setLabel(saveLabel);
458                                 }
459                                 
460                                 lastLabel = lookup[0]->getLabel();
461                                 //prevent memory leak
462                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
463                                 
464                                 if (m->control_pressed) {  globaldata->Groups.clear();   delete designMap;  for (int i = 0; i < outputNames.size(); i++) {      remove(outputNames[i].c_str()); } return 0; }
465                                 
466                                 //get next line to process
467                                 lookup = input.getSharedRAbundVectors();                                
468                         }
469                         
470                         if (m->control_pressed) { globaldata->Groups.clear();  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); }  return 0; }
471                         
472                         //output error messages about any remaining user labels
473                         set<string>::iterator it;
474                         bool needToRun = false;
475                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
476                                 m->mothurOut("Your file does not include the label " + *it); 
477                                 if (processedLabels.count(lastLabel) != 1) {
478                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
479                                         needToRun = true;
480                                 }else {
481                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
482                                 }
483                         }
484                         
485                         //run last label if you need to
486                         if (needToRun == true)  {
487                                 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
488                                 lookup = input.getSharedRAbundVectors(lastLabel);
489                                 
490                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
491                                 
492                                 process(lookup);
493                                 
494                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
495                         }
496                         
497                         //reset groups parameter
498                         globaldata->Groups.clear();  
499                         
500                         
501                 }else { //user provided distance matrix
502                         
503                         if (outputDir == "") { outputDir = m->hasPath(phylipfile); }
504                         
505                         //create a new filename
506                         ofstream out;
507                         string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile))  + "anosim";                               
508                         m->openOutputFile(outputFile, out);
509                         outputNames.push_back(outputFile); outputTypes["anosim"].push_back(outputFile);
510                         
511                         //print headers
512                         out << "groupsCompared\tRValue\tpValue" << endl; 
513                         m->mothurOut("groupsCompared\tRValue\tpValue"); m->mothurOutEndLine();  
514                         out.close();
515                         
516                         ReadPhylipVector readMatrix(phylipfile);
517                         vector< vector<double> > completeMatrix;
518                         vector<string> names = readMatrix.read(completeMatrix);
519                         
520 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
521                         if(processors == 1){
522                                 driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
523                         }else{
524                                 int process = 1;
525                                 vector<int> processIDS;
526                                 
527                                 //loop through and create all the processes you want
528                                 while (process != processors) {
529                                         int pid = fork();
530                                         
531                                         if (pid > 0) {
532                                                 processIDS.push_back(pid);  
533                                                 process++;
534                                         }else if (pid == 0){
535                                                 driver(lines[process].start, lines[process].num, names, toString(getpid()), completeMatrix);
536                                                 exit(0);
537                                         }else { 
538                                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
539                                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
540                                                 exit(0);
541                                         }
542                                 }
543                                 
544                                 //do my part
545                                 driver(lines[0].start, lines[0].num, names, "", completeMatrix);
546                                 
547                                 //force parent to wait until all the processes are done
548                                 for (int i=0;i<(processors-1);i++) { 
549                                         int temp = processIDS[i];
550                                         wait(&temp);
551                                 }
552                                 
553                                 //append files
554                                 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "anosim";                                
555                                 for (int j = 0; j < processIDS.size(); j++) {
556                                         m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
557                                         remove((outputFile + toString(processIDS[j])).c_str());
558                                 }
559                                 
560                         }
561 #else
562                         driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
563 #endif
564                         
565                 }
566                 
567                 delete designMap;
568                 
569                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
570                 
571                 m->mothurOutEndLine();
572                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
573                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
574                 m->mothurOutEndLine();
575                 
576                 return 0;
577         }
578         catch(exception& e) {
579                 m->errorOut(e, "AnosimCommand", "execute");
580                 exit(1);
581         }
582 }
583 //**********************************************************************************************************************
584
585 int AnosimCommand::process(vector<SharedRAbundVector*> thisLookup) {
586         try{
587                 
588 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
589                 if(processors == 1){
590                         driver(0, namesOfGroupCombos.size(), thisLookup, "");
591                 }else{
592                         int process = 1;
593                         vector<int> processIDS;
594                         
595                         //loop through and create all the processes you want
596                         while (process != processors) {
597                                 int pid = fork();
598                                 
599                                 if (pid > 0) {
600                                         processIDS.push_back(pid);  
601                                         process++;
602                                 }else if (pid == 0){
603                                         driver(lines[process].start, lines[process].num, thisLookup, toString(getpid()));
604                                         exit(0);
605                                 }else { 
606                                         m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
607                                         for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
608                                         exit(0);
609                                 }
610                         }
611                         
612                         //do my part
613                         driver(lines[0].start, lines[0].num, thisLookup, "");
614                         
615                         //force parent to wait until all the processes are done
616                         for (int i=0;i<(processors-1);i++) { 
617                                 int temp = processIDS[i];
618                                 wait(&temp);
619                         }
620                         
621                         //append files
622                         for(int i = 0 ; i < calculators.size(); i++) {
623                                 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".anosim";                           
624                                 
625                                 for (int j = 0; j < processIDS.size(); j++) {
626                                         m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
627                                         remove((outputFile + toString(processIDS[j])).c_str());
628                                 }
629                         }
630                 }
631 #else
632                 driver(0, namesOfGroupCombos.size(), thisLookUp, "");
633 #endif
634                 
635                 return 0;
636         }
637         catch(exception& e) {
638                 m->errorOut(e, "AnosimCommand", "process");
639                 exit(1);
640         }
641 }
642 //**********************************************************************************************************************
643
644 int AnosimCommand::driver(int start, int num, vector<SharedRAbundVector*> thisLookup, string pidValue) {
645         try {
646                 vector<SharedRAbundVector*> subset;
647                 EstOutput data;
648                 
649                 //for each calculator
650                 for(int i = 0 ; i < calculators.size(); i++) {
651                         
652                         //create a new filename
653                         ofstream out;
654                         string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".anosim" + pidValue;                                
655                         m->openOutputFileAppend(outputFile, out);
656                         out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
657                         cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
658                         
659                         //for each combo
660                         for (int c = start; c < (start+num); c++) {
661                                 
662                                 if (m->control_pressed) { out.close(); return 0; }
663                                 
664                                 //get set names
665                                 vector<string> setNames;
666                                 for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
667                                 
668                                 vector<SharedRAbundVector*> thisCombosLookup;
669                                 vector<string> thisCombosLookupSets; //what set each sharedRabund is from to be used when calculating SSWithin
670                                 for (int k = 0; k < thisLookup.size(); k++) {
671                                         string thisGroup = thisLookup[k]->getGroup();
672                                         
673                                         //is this group for a set we want to compare??
674                                         if (m->inUsersGroups(designMap->getGroup(thisGroup), setNames)) {  
675                                                 thisCombosLookup.push_back(thisLookup[k]);
676                                                 thisCombosLookupSets.push_back(designMap->getGroup(thisGroup));
677                                         }
678                                         
679                                 }
680                                 
681                                 int numGroups = thisCombosLookup.size();
682                                 
683                                 //calc the distance matrix
684                                 matrix.clear();
685                                 matrix.resize(numGroups);
686                                 for (int k = 0; k < matrix.size(); k++) {       for (int j = 0; j < matrix.size(); j++) {       matrix[k].push_back(1.0);       }       }
687                                 
688                                 if (thisCombosLookup.size() == 0)  { 
689                                         m->mothurOut("[ERROR]: Missing shared info for sets. Skipping comparison."); m->mothurOutEndLine(); 
690                                 }else{
691                                         
692                                         out << thisLookup[0]->getLabel() << '\t';
693                                         m->mothurOut(thisLookup[0]->getLabel() + "\t");
694                                         if (setNames.size() == 2) { out << setNames[0] << "-" << setNames[1] << '\t'; m->mothurOut(setNames[0] + "-" + setNames[1] + "\t");  }
695                                         else { out << "all" << '\t'; m->mothurOut("all\t"); }
696                                         
697                                         for (int k = 0; k < thisCombosLookup.size(); k++) { 
698                                                 for (int l = k; l < thisCombosLookup.size(); l++) {
699                                                         
700                                                         if (m->control_pressed) { out.close(); return 0; }
701                                                         
702                                                         if (k != l) { //we dont need to similiarity of a groups to itself
703                                                                 //get estimated similarity between 2 groups
704                                                                 subset.clear(); //clear out old pair of sharedrabunds
705                                                                 //add new pair of sharedrabunds
706                                                                 subset.push_back(thisCombosLookup[k]); subset.push_back(thisCombosLookup[l]); 
707                                                                 
708                                                                 //if this calc needs all groups to calculate the pair load all groups
709                                                                 if (calculators[i]->getNeedsAll()) { 
710                                                                         //load subset with rest of lookup for those calcs that need everyone to calc for a pair
711                                                                         for (int w = 0; w < thisCombosLookup.size(); w++) {
712                                                                                 if ((w != k) && (w != l)) { subset.push_back(thisCombosLookup[w]); }
713                                                                         }
714                                                                 }
715                                                                 
716                                                                 data = calculators[i]->getValues(subset); //saves the calculator outputs
717                                                                 
718                                                                 //save values in similarity matrix
719                                                                 matrix[k][l] = 1.0 - data[0];
720                                                                 matrix[l][k] = 1.0 - data[0];
721                                                         }
722                                                 }
723                                         }
724                                         
725                                         //calc anosim
726                                         calcAnosim(out, setNames.size(), thisCombosLookupSets);
727                                 }
728                         }
729                         
730                         out.close();
731                 }               
732                 
733                 return 0;
734                 
735         }
736         catch(exception& e) {
737                 m->errorOut(e, "AnosimCommand", "driver");
738                 exit(1);
739         }
740 }
741 //**********************************************************************************************************************
742
743 int AnosimCommand::driver(int start, int num, vector<string> names, string pidValue, vector< vector<double> >& completeMatrix) {
744         try {
745                 
746                 //create a new filename
747                 ofstream out;
748                 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "anosim" + pidValue;                             
749                 m->openOutputFileAppend(outputFile, out);
750                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
751                 cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
752                 
753                 //for each combo
754                 for (int c = start; c < (start+num); c++) {
755                         
756                         if (m->control_pressed) { out.close(); return 0; }
757                         
758                         //get set names
759                         vector<string> setNames;
760                         for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
761                         
762                         vector<string> thisCombosSets; //what set each line in the distance matrix is from to be used when calculating SSWithin
763                         set<int> indexes;
764                         for (int k = 0; k < names.size(); k++) {
765                                 //is this group for a set we want to compare??
766                                 if (m->inUsersGroups(designMap->getGroup(names[k]), setNames)) {  
767                                         thisCombosSets.push_back(designMap->getGroup(names[k]));        
768                                         indexes.insert(k); //save indexes of valid rows in matrix for submatrix
769                                 }
770                         }
771                         
772                         int numGroups = thisCombosSets.size();
773                         
774                         //calc the distance matrix
775                         matrix.clear();
776                         matrix.resize(numGroups);
777                         
778                         for (int k = 0; k < matrix.size(); k++) {       for (int j = 0; j < matrix.size(); j++) {       matrix[k].push_back(1.0);       }       }
779                         
780                         if (thisCombosSets.size() == 0)  { 
781                                 m->mothurOut("[ERROR]: Missing distance info for sets. Skipping comparison."); m->mothurOutEndLine(); 
782                         }else{
783                                 
784                                 if (setNames.size() == 2) { out << setNames[0] << "-" << setNames[1] << '\t'; m->mothurOut(setNames[0] + "-" + setNames[1] + "\t");  }
785                                 else { out << "all" << '\t'; m->mothurOut("all\t"); }
786                                 
787                                 //fill submatrix
788                                 int rowCount = 0;
789                                 for (int j = 0; j < completeMatrix.size(); j++) {
790                                         
791                                         if (indexes.count(j) != 0) { //we want at least part of this row
792                                                 int columnCount = 0;
793                                                 for (int k = 0; k < completeMatrix[j].size(); k++) {
794                                                         
795                                                         if (indexes.count(k) != 0) { //we want this distance
796                                                                 matrix[rowCount][columnCount] = completeMatrix[j][k];
797                                                                 matrix[columnCount][rowCount] = completeMatrix[j][k];
798                                                                 columnCount++;
799                                                         }
800                                                 }
801                                                 rowCount++;
802                                         }
803                                 }
804                                 
805                                 
806                                 //calc anosim
807                                 calcAnosim(out, setNames.size(), thisCombosSets);
808                         }
809                 }
810                 
811                 out.close();
812                 
813                 
814                 return 0;
815                 
816         }
817         catch(exception& e) {
818                 m->errorOut(e, "AnosimCommand", "driver");
819                 exit(1);
820         }
821 }
822 //**********************************************************************************************************************
823 int AnosimCommand::calcAnosim(ofstream& out, int numTreatments, vector<string> thisCombosLookupSets) {
824         try {
825                 //rank distances
826                 vector<seqDist> rankMatrix = convertToRanks();
827                 
828                 double meanBetween, meanWithin, RValue, pValue;
829                 
830                 meanWithin = calcWithinBetween(rankMatrix, thisCombosLookupSets, meanBetween);
831                 
832                 int N = matrix.size();
833                 int div = (N * (N-1) / 4);
834         
835                 //calc RValue
836                 RValue = (meanBetween - meanWithin) / (double) div;
837                 
838                 //calc Pvalue
839                 int count = 0;
840                 for (int i = 0; i < iters; i++) {
841                         if (m->control_pressed) { break; }
842                         
843                         //randomly shuffle names to randomize the matrix
844                         vector<string> copyNames = thisCombosLookupSets;
845                         random_shuffle(copyNames.begin(), copyNames.end());
846                         
847                         meanWithin = calcWithinBetween(rankMatrix, copyNames, meanBetween);
848                         
849                         //calc RValue
850                         double randomRValue = (meanBetween - meanWithin) / (double) div;        
851                         
852                         if (randomRValue >= RValue) { count++; }
853                 }
854                 
855                 pValue = count / (float) iters;
856                 
857                 out << RValue << '\t' << pValue << endl;
858                 cout << RValue << '\t' << pValue << endl;
859                 m->mothurOutJustToLog(toString(RValue) + "\t" + toString(pValue) + "\n");
860                 
861                 return 0;
862         }
863         catch(exception& e) {
864                 m->errorOut(e, "AnosimCommand", "calcAnisom");
865                 exit(1);
866         }
867 }
868 //**********************************************************************************************************************
869 double AnosimCommand::calcWithinBetween(vector<seqDist>& thisMatrix, vector<string> thisCombosLookupSets, double& between) {
870         try {
871                 double within = 0.0;
872                 int count = 0;
873                 int count2 = 0;
874                 between = 0.0;
875                 
876                 for (int l = 0; l < thisMatrix.size(); l++) {
877                         //if you are from the same treatment 
878                         if (thisCombosLookupSets[thisMatrix[l].seq1] == thisCombosLookupSets[thisMatrix[l].seq2]) { 
879                                 within += thisMatrix[l].dist; //rank of this distance
880                                 count++;
881                         }else { //different treatments
882                                 between += thisMatrix[l].dist; //rank of this distance
883                                 count2++;
884                         }
885                 }
886                 
887                 within /= (float) count;
888                 between /= (float) count2;
889                 
890                 return within;
891         }
892         catch(exception& e) {
893                 m->errorOut(e, "AnosimCommand", "calcWithinBetween");
894                 exit(1);
895         }
896 }
897 //**********************************************************************************************************************
898 vector<seqDist> AnosimCommand::convertToRanks() {
899         try {
900                 vector<seqDist> ranks;
901                 
902                 for (int i = 0; i < matrix.size(); i++) {
903                         for (int j = 0; j < i; j++) {
904                                 seqDist member(i, j, matrix[i][j]);
905                                 ranks.push_back(member);
906                         }
907                 }
908                 
909                 //sort distances
910                 sort(ranks.begin(), ranks.end(), compareSequenceDistance);      
911                 
912                 //find ranks of distances
913                 vector<seqDist*> ties;
914                 int rankTotal = 0;
915                 for (int j = 0; j < ranks.size(); j++) {
916                         rankTotal += (j+1);
917                         ties.push_back(&ranks[j]);
918                         
919                         if (j != (ranks.size()-1)) { // you are not the last so you can look ahead
920                                 if (ranks[j].dist != ranks[j+1].dist) { // you are done with ties, rank them and continue
921                                         
922                                         for (int k = 0; k < ties.size(); k++) {
923                                                 float thisrank = rankTotal / (float) ties.size();
924                                                 (*ties[k]).dist = thisrank;
925                                         }
926                                         ties.clear();
927                                         rankTotal = 0;
928                                 }
929                         }else { // you are the last one
930                                 
931                                 for (int k = 0; k < ties.size(); k++) {
932                                         float thisrank = rankTotal / (float) ties.size();
933                                         (*ties[k]).dist = thisrank;
934                                 }
935                         }
936                 }
937                 
938                 return ranks;
939         }
940         catch(exception& e) {
941                 m->errorOut(e, "AnosimCommand", "convertToRanks");
942                 exit(1);
943         }
944 }
945 //**********************************************************************************************************************
946
947
948