5 * Created by westcott on 2/14/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "anosimcommand.h"
11 #include "sharedutilities.h"
12 #include "sharedsobscollectsummary.h"
13 #include "sharedchao1.h"
14 #include "sharedace.h"
15 #include "sharednseqs.h"
16 #include "sharedjabund.h"
17 #include "sharedsorabund.h"
18 #include "sharedjclass.h"
19 #include "sharedsorclass.h"
20 #include "sharedjest.h"
21 #include "sharedsorest.h"
22 #include "sharedthetayc.h"
23 #include "sharedthetan.h"
24 #include "sharedkstest.h"
25 #include "whittaker.h"
26 #include "sharedochiai.h"
27 #include "sharedanderbergs.h"
28 #include "sharedkulczynski.h"
29 #include "sharedkulczynskicody.h"
30 #include "sharedlennon.h"
31 #include "sharedmorisitahorn.h"
32 #include "sharedbraycurtis.h"
33 #include "sharedjackknife.h"
34 #include "whittaker.h"
37 #include "structeuclidean.h"
38 #include "structchord.h"
39 #include "hellinger.h"
40 #include "manhattan.h"
41 #include "structpearson.h"
44 #include "structkulczynski.h"
45 #include "structchi2.h"
46 #include "speciesprofile.h"
51 #include "memeuclidean.h"
52 #include "mempearson.h"
54 //**********************************************************************************************************************
55 vector<string> AnosimCommand::getValidParameters(){
57 string Array[] = {"groups","label","outputdir","iters","phylip","design","sets","processors","inputdir"};
58 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
62 m->errorOut(e, "AnosimCommand", "getValidParameters");
66 //**********************************************************************************************************************
67 AnosimCommand::AnosimCommand(){
69 abort = true; calledHelp = true;
70 vector<string> tempOutNames;
71 outputTypes["anosim"] = tempOutNames;
74 m->errorOut(e, "AnosimCommand", "AnosimCommand");
78 //**********************************************************************************************************************
79 vector<string> AnosimCommand::getRequiredParameters(){
81 string Array[] = {"design"};
82 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
86 m->errorOut(e, "AnosimCommand", "getRequiredParameters");
90 //**********************************************************************************************************************
91 vector<string> AnosimCommand::getRequiredFiles(){
94 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
98 m->errorOut(e, "AnosimCommand", "getRequiredFiles");
102 //**********************************************************************************************************************
104 AnosimCommand::AnosimCommand(string option) {
106 globaldata = GlobalData::getInstance();
107 abort = false; calledHelp = false;
111 //allow user to run help
112 if(option == "help") { help(); abort = true; calledHelp = true; }
115 //valid paramters for this command
116 string AlignArray[] = {"groups","label","outputdir","iters","phylip","design","sets","processors","inputdir"};
117 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
119 OptionParser parser(option);
120 map<string,string> parameters = parser.getParameters();
122 ValidParameters validParameter;
124 //check to make sure all parameters are valid for command
125 map<string,string>::iterator it;
126 for (it = parameters.begin(); it != parameters.end(); it++) {
127 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
130 //initialize outputTypes
131 vector<string> tempOutNames;
132 outputTypes["anosim"] = tempOutNames;
134 //if the user changes the output directory command factory will send this info to us in the output parameter
135 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
137 //if the user changes the input directory command factory will send this info to us in the output parameter
138 string inputDir = validParameter.validFile(parameters, "inputdir", false);
139 if (inputDir == "not found"){ inputDir = ""; }
142 it = parameters.find("design");
143 //user has given a template file
144 if(it != parameters.end()){
145 path = m->hasPath(it->second);
146 //if the user has not given a path then, add inputdir. else leave path alone.
147 if (path == "") { parameters["design"] = inputDir + it->second; }
150 it = parameters.find("phylip");
151 //user has given a template file
152 if(it != parameters.end()){
153 path = m->hasPath(it->second);
154 //if the user has not given a path then, add inputdir. else leave path alone.
155 if (path == "") { parameters["phylip"] = inputDir + it->second; }
159 phylipfile = validParameter.validFile(parameters, "phylip", true);
160 if (phylipfile == "not open") { phylipfile = ""; abort = true; }
161 else if (phylipfile == "not found") { phylipfile = ""; }
162 else { globaldata->newRead(); format = "phylip"; globaldata->setPhylipFile(phylipfile); }
164 //check for required parameters
165 designfile = validParameter.validFile(parameters, "design", true);
166 if (designfile == "not open") { abort = true; }
167 else if (designfile == "not found") { designfile = ""; m->mothurOut("You must provide an design file."); m->mothurOutEndLine(); abort = true; }
169 //make sure the user has already run the read.otu command
170 if ((globaldata->getSharedFile() == "")) {
171 if ((phylipfile == "")) {
172 m->mothurOut("You must read a list and a group, a shared file or provide a distance matrix before you can use the anosim command."); m->mothurOutEndLine(); abort = true;
174 }else { sharedfile = globaldata->getSharedFile(); }
176 //use distance matrix if one is provided
177 if ((sharedfile != "") && (phylipfile != "")) { sharedfile = ""; }
179 //check for optional parameter and set defaults
180 // ...at some point should added some additional type checking...
181 label = validParameter.validFile(parameters, "label", false);
182 if (label == "not found") { label = ""; }
184 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
185 else { allLines = 1; }
188 //if the user has not specified any labels use the ones from read.otu
190 allLines = globaldata->allLines;
191 labels = globaldata->labels;
194 groups = validParameter.validFile(parameters, "groups", false);
195 if (groups == "not found") { groups = ""; }
197 m->splitAtDash(groups, Groups);
198 globaldata->Groups = Groups;
201 sets = validParameter.validFile(parameters, "sets", false);
202 if (sets == "not found") { sets = ""; pickedGroups = false; }
205 m->splitAtDash(sets, Sets);
209 string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
210 convert(temp, iters);
212 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; }
213 convert(temp, processors);
215 vector<string> Estimators;
216 calc = validParameter.validFile(parameters, "calc", false);
217 if (calc == "not found") { calc = "morisitahorn"; }
218 m->splitAtDash(calc, Estimators);
220 if (abort == false) {
222 ValidCalculators* validCalculator = new ValidCalculators();
224 for (int i=0; i<Estimators.size(); i++) {
225 if (validCalculator->isValidCalculator("treegroup", Estimators[i]) == true) {
226 if (Estimators[i] == "sharedsobs") {
227 calculators.push_back(new SharedSobsCS());
228 }else if (Estimators[i] == "sharedchao") {
229 calculators.push_back(new SharedChao1());
230 }else if (Estimators[i] == "sharedace") {
231 calculators.push_back(new SharedAce());
232 }else if (Estimators[i] == "jabund") {
233 calculators.push_back(new JAbund());
234 }else if (Estimators[i] == "sorabund") {
235 calculators.push_back(new SorAbund());
236 }else if (Estimators[i] == "jclass") {
237 calculators.push_back(new Jclass());
238 }else if (Estimators[i] == "sorclass") {
239 calculators.push_back(new SorClass());
240 }else if (Estimators[i] == "jest") {
241 calculators.push_back(new Jest());
242 }else if (Estimators[i] == "sorest") {
243 calculators.push_back(new SorEst());
244 }else if (Estimators[i] == "thetayc") {
245 calculators.push_back(new ThetaYC());
246 }else if (Estimators[i] == "thetan") {
247 calculators.push_back(new ThetaN());
248 }else if (Estimators[i] == "kstest") {
249 calculators.push_back(new KSTest());
250 }else if (Estimators[i] == "sharednseqs") {
251 calculators.push_back(new SharedNSeqs());
252 }else if (Estimators[i] == "ochiai") {
253 calculators.push_back(new Ochiai());
254 }else if (Estimators[i] == "anderberg") {
255 calculators.push_back(new Anderberg());
256 }else if (Estimators[i] == "kulczynski") {
257 calculators.push_back(new Kulczynski());
258 }else if (Estimators[i] == "kulczynskicody") {
259 calculators.push_back(new KulczynskiCody());
260 }else if (Estimators[i] == "lennon") {
261 calculators.push_back(new Lennon());
262 }else if (Estimators[i] == "morisitahorn") {
263 calculators.push_back(new MorHorn());
264 }else if (Estimators[i] == "braycurtis") {
265 calculators.push_back(new BrayCurtis());
266 }else if (Estimators[i] == "whittaker") {
267 calculators.push_back(new Whittaker());
268 }else if (Estimators[i] == "odum") {
269 calculators.push_back(new Odum());
270 }else if (Estimators[i] == "canberra") {
271 calculators.push_back(new Canberra());
272 }else if (Estimators[i] == "structeuclidean") {
273 calculators.push_back(new StructEuclidean());
274 }else if (Estimators[i] == "structchord") {
275 calculators.push_back(new StructChord());
276 }else if (Estimators[i] == "hellinger") {
277 calculators.push_back(new Hellinger());
278 }else if (Estimators[i] == "manhattan") {
279 calculators.push_back(new Manhattan());
280 }else if (Estimators[i] == "structpearson") {
281 calculators.push_back(new StructPearson());
282 }else if (Estimators[i] == "soergel") {
283 calculators.push_back(new Soergel());
284 }else if (Estimators[i] == "spearman") {
285 calculators.push_back(new Spearman());
286 }else if (Estimators[i] == "structkulczynski") {
287 calculators.push_back(new StructKulczynski());
288 }else if (Estimators[i] == "speciesprofile") {
289 calculators.push_back(new SpeciesProfile());
290 }else if (Estimators[i] == "hamming") {
291 calculators.push_back(new Hamming());
292 }else if (Estimators[i] == "structchi2") {
293 calculators.push_back(new StructChi2());
294 }else if (Estimators[i] == "gower") {
295 calculators.push_back(new Gower());
296 }else if (Estimators[i] == "memchi2") {
297 calculators.push_back(new MemChi2());
298 }else if (Estimators[i] == "memchord") {
299 calculators.push_back(new MemChord());
300 }else if (Estimators[i] == "memeuclidean") {
301 calculators.push_back(new MemEuclidean());
302 }else if (Estimators[i] == "mempearson") {
303 calculators.push_back(new MemPearson());
311 catch(exception& e) {
312 m->errorOut(e, "AnosimCommand", "AnosimCommand");
317 //**********************************************************************************************************************
319 void AnosimCommand::help(){
321 m->mothurOut("The anosim command can only be executed after a successful read.otu command of a list and group or shared file, or by providing a phylip formatted distance matrix.\n");
322 m->mothurOut("The anosim command outputs a .anosim file. \n");
323 m->mothurOut("The anosim command parameters are phylip, iters, groups, label, design, sets and processors. The design parameter is required.\n");
324 m->mothurOut("The design parameter allows you to assign your groups to sets when you are running anosim. It is required. \n");
325 m->mothurOut("The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n");
326 m->mothurOut("The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile. To run the pairwise comparisons of all sets use sets=all.\n");
327 m->mothurOut("The iters parameter allows you to set number of randomization for the P value. The default is 1000. \n");
328 m->mothurOut("The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes. groups=all will find all pairwise comparisons. \n");
329 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
330 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n");
331 m->mothurOut("The anosim command should be in the following format: anosim(design=yourDesignFile).\n");
332 m->mothurOut("Example anosim(design=temp.design, groups=A-B-C).\n");
333 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
336 catch(exception& e) {
337 m->errorOut(e, "AnosimCommand", "help");
342 //**********************************************************************************************************************
344 AnosimCommand::~AnosimCommand(){}
346 //**********************************************************************************************************************
348 int AnosimCommand::execute(){
351 if (abort == true) { if (calledHelp) { return 0; } return 2; }
354 designMap = new GroupMap(designfile);
355 designMap->readDesignMap();
357 //make sure sets are all in designMap
358 SharedUtil* util = new SharedUtil();
359 util->setGroups(Sets, designMap->namesOfGroups);
362 //if the user pickedGroups or set sets=all, then we want to figure out how to split the pairwise comparison between processors
363 int numGroups = Sets.size();
365 for (int a=0; a<numGroups; a++) {
366 for (int l = 0; l < a; l++) {
367 vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
368 namesOfGroupCombos.push_back(groups);
371 }else { //one giant compare
372 vector<string> groups;
373 for (int a=0; a<numGroups; a++) { groups.push_back(Sets[a]); }
374 namesOfGroupCombos.push_back(groups);
378 if (numGroups == 2) { processors = 1; }
379 else if (numGroups < 2) { m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }
381 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
383 int numPairs = namesOfGroupCombos.size();
384 int numPairsPerProcessor = numPairs / processors;
386 for (int i = 0; i < processors; i++) {
387 int startPos = i * numPairsPerProcessor;
388 if(i == processors - 1){
389 numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
391 lines.push_back(linePair(startPos, numPairsPerProcessor));
396 if (sharedfile != "") { //create distance matrix for each label
398 if (outputDir == "") { outputDir = m->hasPath(sharedfile); }
400 //for each calculator
401 for(int i = 0 ; i < calculators.size(); i++) {
403 //create a new filename
405 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".anosim";
406 m->openOutputFile(outputFile, out);
407 outputNames.push_back(outputFile); outputTypes["anosim"].push_back(outputFile);
410 out << "label\tgroupsCompared\tRValue\tpValue" << endl;
411 m->mothurOut("\nlabel\tgroupsCompared\tRValue\tpValue"); m->mothurOutEndLine();
415 InputData input(sharedfile, "sharedfile");
416 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
417 string lastLabel = lookup[0]->getLabel();
419 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
420 set<string> processedLabels;
421 set<string> userLabels = labels;
423 //sanity check between shared file groups and design file groups
424 for (int i = 0; i < lookup.size(); i++) {
425 string group = designMap->getGroup(lookup[i]->getGroup());
427 if (group == "not found") { m->control_pressed = true; m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct."); m->mothurOutEndLine(); }
430 //as long as you are not at the end of the file or done wih the lines you want
431 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
433 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
435 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
437 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
440 processedLabels.insert(lookup[0]->getLabel());
441 userLabels.erase(lookup[0]->getLabel());
444 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
445 string saveLabel = lookup[0]->getLabel();
447 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
448 lookup = input.getSharedRAbundVectors(lastLabel);
449 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
453 processedLabels.insert(lookup[0]->getLabel());
454 userLabels.erase(lookup[0]->getLabel());
456 //restore real lastlabel to save below
457 lookup[0]->setLabel(saveLabel);
460 lastLabel = lookup[0]->getLabel();
461 //prevent memory leak
462 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
464 if (m->control_pressed) { globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
466 //get next line to process
467 lookup = input.getSharedRAbundVectors();
470 if (m->control_pressed) { globaldata->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
472 //output error messages about any remaining user labels
473 set<string>::iterator it;
474 bool needToRun = false;
475 for (it = userLabels.begin(); it != userLabels.end(); it++) {
476 m->mothurOut("Your file does not include the label " + *it);
477 if (processedLabels.count(lastLabel) != 1) {
478 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
481 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
485 //run last label if you need to
486 if (needToRun == true) {
487 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
488 lookup = input.getSharedRAbundVectors(lastLabel);
490 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
494 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
497 //reset groups parameter
498 globaldata->Groups.clear();
501 }else { //user provided distance matrix
503 if (outputDir == "") { outputDir = m->hasPath(phylipfile); }
505 //create a new filename
507 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "anosim";
508 m->openOutputFile(outputFile, out);
509 outputNames.push_back(outputFile); outputTypes["anosim"].push_back(outputFile);
512 out << "groupsCompared\tRValue\tpValue" << endl;
513 m->mothurOut("\ngroupsCompared\tRValue\tpValue"); m->mothurOutEndLine();
516 ReadPhylipVector readMatrix(phylipfile);
517 vector< vector<double> > completeMatrix;
518 vector<string> names = readMatrix.read(completeMatrix);
520 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
522 driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
525 vector<int> processIDS;
527 //loop through and create all the processes you want
528 while (process != processors) {
532 processIDS.push_back(pid);
535 driver(lines[process].start, lines[process].num, names, toString(getpid()), completeMatrix);
538 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
539 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
545 driver(lines[0].start, lines[0].num, names, "", completeMatrix);
547 //force parent to wait until all the processes are done
548 for (int i=0;i<(processors-1);i++) {
549 int temp = processIDS[i];
554 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "anosim";
555 for (int j = 0; j < processIDS.size(); j++) {
556 m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
557 remove((outputFile + toString(processIDS[j])).c_str());
562 driver(0, namesOfGroupCombos.size(), names, "", completeMatrix);
569 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
571 m->mothurOutEndLine();
572 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
573 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
574 m->mothurOutEndLine();
578 catch(exception& e) {
579 m->errorOut(e, "AnosimCommand", "execute");
583 //**********************************************************************************************************************
585 int AnosimCommand::process(vector<SharedRAbundVector*> thisLookup) {
588 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
590 driver(0, namesOfGroupCombos.size(), thisLookup, "");
593 vector<int> processIDS;
595 //loop through and create all the processes you want
596 while (process != processors) {
600 processIDS.push_back(pid);
603 driver(lines[process].start, lines[process].num, thisLookup, toString(getpid()));
606 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
607 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
613 driver(lines[0].start, lines[0].num, thisLookup, "");
615 //force parent to wait until all the processes are done
616 for (int i=0;i<(processors-1);i++) {
617 int temp = processIDS[i];
622 for(int i = 0 ; i < calculators.size(); i++) {
623 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".anosim";
625 for (int j = 0; j < processIDS.size(); j++) {
626 m->appendFiles((outputFile + toString(processIDS[j])), outputFile);
627 remove((outputFile + toString(processIDS[j])).c_str());
632 driver(0, namesOfGroupCombos.size(), thisLookUp, "");
637 catch(exception& e) {
638 m->errorOut(e, "AnosimCommand", "process");
642 //**********************************************************************************************************************
644 int AnosimCommand::driver(int start, int num, vector<SharedRAbundVector*> thisLookup, string pidValue) {
646 vector<SharedRAbundVector*> subset;
649 //for each calculator
650 for(int i = 0 ; i < calculators.size(); i++) {
652 //create a new filename
654 string outputFile = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + calculators[i]->getName() + ".anosim" + pidValue;
655 m->openOutputFileAppend(outputFile, out);
656 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
657 cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
660 for (int c = start; c < (start+num); c++) {
662 if (m->control_pressed) { out.close(); return 0; }
665 vector<string> setNames;
666 for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
668 vector<SharedRAbundVector*> thisCombosLookup;
669 vector<string> thisCombosLookupSets; //what set each sharedRabund is from to be used when calculating SSWithin
670 for (int k = 0; k < thisLookup.size(); k++) {
671 string thisGroup = thisLookup[k]->getGroup();
673 //is this group for a set we want to compare??
674 if (m->inUsersGroups(designMap->getGroup(thisGroup), setNames)) {
675 thisCombosLookup.push_back(thisLookup[k]);
676 thisCombosLookupSets.push_back(designMap->getGroup(thisGroup));
681 int numGroups = thisCombosLookup.size();
683 //calc the distance matrix
685 matrix.resize(numGroups);
686 for (int k = 0; k < matrix.size(); k++) { for (int j = 0; j < matrix.size(); j++) { matrix[k].push_back(1.0); } }
688 if (thisCombosLookup.size() == 0) {
689 m->mothurOut("[ERROR]: Missing shared info for sets. Skipping comparison."); m->mothurOutEndLine();
692 out << thisLookup[0]->getLabel() << '\t';
693 m->mothurOut(thisLookup[0]->getLabel() + "\t");
694 if (setNames.size() == 2) { out << setNames[0] << "-" << setNames[1] << '\t'; m->mothurOut(setNames[0] + "-" + setNames[1] + "\t"); }
695 else { out << "all" << '\t'; m->mothurOut("all\t"); }
697 for (int k = 0; k < thisCombosLookup.size(); k++) {
698 for (int l = k; l < thisCombosLookup.size(); l++) {
700 if (m->control_pressed) { out.close(); return 0; }
702 if (k != l) { //we dont need to similiarity of a groups to itself
703 //get estimated similarity between 2 groups
704 subset.clear(); //clear out old pair of sharedrabunds
705 //add new pair of sharedrabunds
706 subset.push_back(thisCombosLookup[k]); subset.push_back(thisCombosLookup[l]);
708 //if this calc needs all groups to calculate the pair load all groups
709 if (calculators[i]->getNeedsAll()) {
710 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
711 for (int w = 0; w < thisCombosLookup.size(); w++) {
712 if ((w != k) && (w != l)) { subset.push_back(thisCombosLookup[w]); }
716 data = calculators[i]->getValues(subset); //saves the calculator outputs
718 //save values in similarity matrix
719 matrix[k][l] = 1.0 - data[0];
720 matrix[l][k] = 1.0 - data[0];
726 calcAnosim(out, setNames.size(), thisCombosLookupSets);
736 catch(exception& e) {
737 m->errorOut(e, "AnosimCommand", "driver");
741 //**********************************************************************************************************************
743 int AnosimCommand::driver(int start, int num, vector<string> names, string pidValue, vector< vector<double> >& completeMatrix) {
746 //create a new filename
748 string outputFile = outputDir + m->getRootName(m->getSimpleName(phylipfile)) + "anosim" + pidValue;
749 m->openOutputFileAppend(outputFile, out);
750 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
751 cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
754 for (int c = start; c < (start+num); c++) {
756 if (m->control_pressed) { out.close(); return 0; }
759 vector<string> setNames;
760 for (int j = 0; j < namesOfGroupCombos[c].size(); j++) { setNames.push_back(namesOfGroupCombos[c][j]); }
762 vector<string> thisCombosSets; //what set each line in the distance matrix is from to be used when calculating SSWithin
764 for (int k = 0; k < names.size(); k++) {
765 //is this group for a set we want to compare??
766 if (m->inUsersGroups(designMap->getGroup(names[k]), setNames)) {
767 thisCombosSets.push_back(designMap->getGroup(names[k]));
768 indexes.insert(k); //save indexes of valid rows in matrix for submatrix
772 int numGroups = thisCombosSets.size();
774 //calc the distance matrix
776 matrix.resize(numGroups);
778 for (int k = 0; k < matrix.size(); k++) { for (int j = 0; j < matrix.size(); j++) { matrix[k].push_back(1.0); } }
780 if (thisCombosSets.size() == 0) {
781 m->mothurOut("[ERROR]: Missing distance info for sets. Skipping comparison."); m->mothurOutEndLine();
784 if (setNames.size() == 2) { out << setNames[0] << "-" << setNames[1] << '\t'; m->mothurOut(setNames[0] + "-" + setNames[1] + "\t"); }
785 else { out << "all" << '\t'; m->mothurOut("all\t"); }
789 for (int j = 0; j < completeMatrix.size(); j++) {
791 if (indexes.count(j) != 0) { //we want at least part of this row
793 for (int k = 0; k < completeMatrix[j].size(); k++) {
795 if (indexes.count(k) != 0) { //we want this distance
796 matrix[rowCount][columnCount] = completeMatrix[j][k];
797 matrix[columnCount][rowCount] = completeMatrix[j][k];
807 calcAnosim(out, setNames.size(), thisCombosSets);
817 catch(exception& e) {
818 m->errorOut(e, "AnosimCommand", "driver");
822 //**********************************************************************************************************************
823 int AnosimCommand::calcAnosim(ofstream& out, int numTreatments, vector<string> thisCombosLookupSets) {
826 vector<seqDist> rankMatrix = convertToRanks();
828 double meanBetween, meanWithin, RValue, pValue;
830 meanWithin = calcWithinBetween(rankMatrix, thisCombosLookupSets, meanBetween);
832 int N = matrix.size();
833 int div = (N * (N-1) / 4);
836 RValue = (meanBetween - meanWithin) / (double) div;
840 for (int i = 0; i < iters; i++) {
841 if (m->control_pressed) { break; }
843 //randomly shuffle names to randomize the matrix
844 vector<string> copyNames = thisCombosLookupSets;
845 random_shuffle(copyNames.begin(), copyNames.end());
847 meanWithin = calcWithinBetween(rankMatrix, copyNames, meanBetween);
850 double randomRValue = (meanBetween - meanWithin) / (double) div;
852 if (randomRValue >= RValue) { count++; }
855 pValue = count / (float) iters;
857 out << RValue << '\t' << pValue << endl;
858 cout << RValue << '\t' << pValue << endl;
859 m->mothurOutJustToLog(toString(RValue) + "\t" + toString(pValue) + "\n");
863 catch(exception& e) {
864 m->errorOut(e, "AnosimCommand", "calcAnisom");
868 //**********************************************************************************************************************
869 double AnosimCommand::calcWithinBetween(vector<seqDist>& thisMatrix, vector<string> thisCombosLookupSets, double& between) {
876 for (int l = 0; l < thisMatrix.size(); l++) {
877 //if you are from the same treatment
878 if (thisCombosLookupSets[thisMatrix[l].seq1] == thisCombosLookupSets[thisMatrix[l].seq2]) {
879 within += thisMatrix[l].dist; //rank of this distance
881 }else { //different treatments
882 between += thisMatrix[l].dist; //rank of this distance
887 within /= (float) count;
888 between /= (float) count2;
892 catch(exception& e) {
893 m->errorOut(e, "AnosimCommand", "calcWithinBetween");
897 //**********************************************************************************************************************
898 vector<seqDist> AnosimCommand::convertToRanks() {
900 vector<seqDist> ranks;
902 for (int i = 0; i < matrix.size(); i++) {
903 for (int j = 0; j < i; j++) {
904 seqDist member(i, j, matrix[i][j]);
905 ranks.push_back(member);
910 sort(ranks.begin(), ranks.end(), compareSequenceDistance);
912 //find ranks of distances
913 vector<seqDist*> ties;
915 for (int j = 0; j < ranks.size(); j++) {
917 ties.push_back(&ranks[j]);
919 if (j != (ranks.size()-1)) { // you are not the last so you can look ahead
920 if (ranks[j].dist != ranks[j+1].dist) { // you are done with ties, rank them and continue
922 for (int k = 0; k < ties.size(); k++) {
923 float thisrank = rankTotal / (float) ties.size();
924 (*ties[k]).dist = thisrank;
929 }else { // you are the last one
931 for (int k = 0; k < ties.size(); k++) {
932 float thisrank = rankTotal / (float) ties.size();
933 (*ties[k]).dist = thisrank;
940 catch(exception& e) {
941 m->errorOut(e, "AnosimCommand", "convertToRanks");
945 //**********************************************************************************************************************