5 * Created by westcott on 2/14/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "anosimcommand.h"
11 #include "inputdata.h"
12 #include "readphylipvector.h"
14 //**********************************************************************************************************************
15 vector<string> AnosimCommand::setParameters(){
17 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","anosim",false,true,true); parameters.push_back(pdesign);
18 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none","anosim",false,true,true); parameters.push_back(pphylip);
19 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
20 CommandParameter palpha("alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha);
21 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
22 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
24 vector<string> myArray;
25 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
29 m->errorOut(e, "AnosimCommand", "setParameters");
33 //**********************************************************************************************************************
34 string AnosimCommand::getHelpString(){
36 string helpString = "";
37 helpString += "Referenced: Clarke, K. R. (1993). Non-parametric multivariate analysis of changes in community structure. _Australian Journal of Ecology_ 18, 117-143.\n";
38 helpString += "The anosim command outputs a .anosim file. \n";
39 helpString += "The anosim command parameters are phylip, iters, and alpha. The phylip and design parameters are required, unless you have valid current files.\n";
40 helpString += "The design parameter allows you to assign your samples to groups when you are running anosim. It is required. \n";
41 helpString += "The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.\n";
42 helpString += "The iters parameter allows you to set number of randomization for the P value. The default is 1000. \n";
43 helpString += "The anosim command should be in the following format: anosim(phylip=file.dist, design=file.design).\n";
44 helpString += "Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).\n";
48 m->errorOut(e, "AnosimCommand", "getHelpString");
52 //**********************************************************************************************************************
53 string AnosimCommand::getOutputPattern(string type) {
57 if (type == "anosim") { pattern = "[filename],anosim"; } //makes file like: amazon.align
58 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
63 m->errorOut(e, "AnosimCommand", "getOutputPattern");
67 //**********************************************************************************************************************
68 AnosimCommand::AnosimCommand(){
70 abort = true; calledHelp = true;
72 vector<string> tempOutNames;
73 outputTypes["anosim"] = tempOutNames;
76 m->errorOut(e, "AnosimCommand", "AnosimCommand");
80 //**********************************************************************************************************************
82 AnosimCommand::AnosimCommand(string option) {
84 abort = false; calledHelp = false;
86 //allow user to run help
87 if(option == "help") { help(); abort = true; calledHelp = true; }
88 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
91 vector<string> myArray = setParameters();
93 OptionParser parser(option);
94 map<string,string> parameters = parser.getParameters();
96 ValidParameters validParameter;
98 //check to make sure all parameters are valid for command
99 map<string,string>::iterator it;
100 for (it = parameters.begin(); it != parameters.end(); it++) {
101 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
104 //initialize outputTypes
105 vector<string> tempOutNames;
106 outputTypes["anosim"] = tempOutNames;
108 //if the user changes the output directory command factory will send this info to us in the output parameter
109 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
111 //if the user changes the input directory command factory will send this info to us in the output parameter
112 string inputDir = validParameter.validFile(parameters, "inputdir", false);
113 if (inputDir == "not found"){ inputDir = ""; }
116 it = parameters.find("design");
117 //user has given a template file
118 if(it != parameters.end()){
119 path = m->hasPath(it->second);
120 //if the user has not given a path then, add inputdir. else leave path alone.
121 if (path == "") { parameters["design"] = inputDir + it->second; }
124 it = parameters.find("phylip");
125 //user has given a template file
126 if(it != parameters.end()){
127 path = m->hasPath(it->second);
128 //if the user has not given a path then, add inputdir. else leave path alone.
129 if (path == "") { parameters["phylip"] = inputDir + it->second; }
133 phylipFileName = validParameter.validFile(parameters, "phylip", true);
134 if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
135 else if (phylipFileName == "not found") {
136 //if there is a current phylip file, use it
137 phylipFileName = m->getPhylipFile();
138 if (phylipFileName != "") { m->mothurOut("Using " + phylipFileName + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
139 else { m->mothurOut("You have no current phylip file and the phylip parameter is required."); m->mothurOutEndLine(); abort = true; }
141 }else { m->setPhylipFile(phylipFileName); }
143 //check for required parameters
144 designFileName = validParameter.validFile(parameters, "design", true);
145 if (designFileName == "not open") { designFileName = ""; abort = true; }
146 else if (designFileName == "not found") {
147 //if there is a current design file, use it
148 designFileName = m->getDesignFile();
149 if (designFileName != "") { m->mothurOut("Using " + designFileName + " as input file for the design parameter."); m->mothurOutEndLine(); }
150 else { m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
151 }else { m->setDesignFile(designFileName); }
153 string temp = validParameter.validFile(parameters, "iters", false);
154 if (temp == "not found") { temp = "1000"; }
155 m->mothurConvert(temp, iters);
157 temp = validParameter.validFile(parameters, "alpha", false);
158 if (temp == "not found") { temp = "0.05"; }
159 m->mothurConvert(temp, experimentwiseAlpha);
163 catch(exception& e) {
164 m->errorOut(e, "AnosimCommand", "AnosimCommand");
168 //**********************************************************************************************************************
169 int AnosimCommand::execute(){
172 if (abort == true) { if (calledHelp) { return 0; } return 2; }
175 designMap = new GroupMap(designFileName);
176 designMap->readDesignMap();
178 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
180 //read in distance matrix and square it
181 ReadPhylipVector readMatrix(phylipFileName);
182 vector<string> sampleNames = readMatrix.read(distanceMatrix);
184 for(int i=0;i<distanceMatrix.size();i++){
185 for(int j=0;j<i;j++){
186 distanceMatrix[i][j] *= distanceMatrix[i][j];
190 //link designMap to rows/columns in distance matrix
191 map<string, vector<int> > origGroupSampleMap;
192 for(int i=0;i<sampleNames.size();i++){
193 string group = designMap->getGroup(sampleNames[i]);
195 if (group == "not found") {
196 m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
197 }else { origGroupSampleMap[group].push_back(i); }
199 int numGroups = origGroupSampleMap.size();
201 if (m->control_pressed) { delete designMap; return 0; }
203 //create a new filename
205 map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(phylipFileName));
206 string ANOSIMFileName = getOutputFileName("anosim", variables);
208 m->openOutputFile(ANOSIMFileName, ANOSIMFile);
209 outputNames.push_back(ANOSIMFileName); outputTypes["anosim"].push_back(ANOSIMFileName);
210 m->mothurOut("\ncomparison\tR-value\tP-value\n");
211 ANOSIMFile << "comparison\tR-value\tP-value\n";
214 double fullANOSIMPValue = runANOSIM(ANOSIMFile, distanceMatrix, origGroupSampleMap, experimentwiseAlpha);
217 if(fullANOSIMPValue <= experimentwiseAlpha && numGroups > 2){
219 int numCombos = numGroups * (numGroups-1) / 2;
220 double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
222 for(map<string, vector<int> >::iterator itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
223 map<string, vector<int> >::iterator itB = itA;
225 for(itB;itB!=origGroupSampleMap.end();itB++){
227 map<string, vector<int> > subGroupSampleMap;
229 subGroupSampleMap[itA->first] = itA->second; string groupA = itA->first;
230 subGroupSampleMap[itB->first] = itB->second; string groupB = itB->first;
232 vector<int> subIndices;
233 for(map<string, vector<int> >::iterator it=subGroupSampleMap.begin();it!=subGroupSampleMap.end();it++){
234 subIndices.insert(subIndices.end(), it->second.begin(), it->second.end());
236 int subNumSamples = subIndices.size();
238 sort(subIndices.begin(), subIndices.end());
240 vector<vector<double> > subDistMatrix(distanceMatrix.size());
241 for(int i=0;i<distanceMatrix.size();i++){
242 subDistMatrix[i].assign(distanceMatrix.size(), -1);
245 for(int i=0;i<subNumSamples;i++){
246 for(int j=0;j<i;j++){
247 subDistMatrix[subIndices[i]][subIndices[j]] = distanceMatrix[subIndices[i]][subIndices[j]];
251 runANOSIM(ANOSIMFile, subDistMatrix, subGroupSampleMap, pairwiseAlpha);
256 m->mothurOut("\nExperiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
257 m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
260 m->mothurOut("\nExperiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
262 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
268 m->mothurOutEndLine();
269 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
270 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
271 m->mothurOutEndLine();
275 catch(exception& e) {
276 m->errorOut(e, "AnosimCommand", "execute");
280 //**********************************************************************************************************************
282 double AnosimCommand::runANOSIM(ofstream& ANOSIMFile, vector<vector<double> > dMatrix, map<string, vector<int> > groupSampleMap, double alpha) {
286 vector<vector<double> > rankMatrix = convertToRanks(dMatrix);
287 double RValue = calcR(rankMatrix, groupSampleMap);
290 for(int i=0;i<iters;i++){
291 map<string, vector<int> > randGroupSampleMap = getRandomizedGroups(groupSampleMap);
292 double RValueRand = calcR(rankMatrix, randGroupSampleMap);
293 if(RValue <= RValueRand){ pCount++; }
296 double pValue = (double)pCount / (double) iters;
298 if(pValue < 1/(double)iters){ pString = '<' + toString(1/(double)iters); }
299 else { pString = toString(pValue); }
302 map<string, vector<int> >::iterator it=groupSampleMap.begin();
303 m->mothurOut(it->first);
304 ANOSIMFile << it->first;
306 for(it;it!=groupSampleMap.end();it++){
307 m->mothurOut('-' + it->first);
308 ANOSIMFile << '-' << it->first;
311 m->mothurOut('\t' + toString(RValue) + '\t' + pString);
312 ANOSIMFile << '\t' << RValue << '\t' << pString;
319 m->mothurOutEndLine();
323 catch(exception& e) {
324 m->errorOut(e, "AnosimCommand", "calcAnisom");
329 //**********************************************************************************************************************
331 double AnosimCommand::calcR(vector<vector<double> > rankMatrix, map<string, vector<int> > groupSampleMap){
335 for(map<string, vector<int> >::iterator it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
336 numSamples += it->second.size();
341 int numWithinComps = 0;
343 for(map<string, vector<int> >::iterator it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
344 vector<int> indices = it->second;
345 for(int i=0;i<indices.size();i++){
346 for(int j=0;j<i;j++){
347 if(indices[i] > indices[j]) { within += rankMatrix[indices[i]][indices[j]]; }
348 else { within += rankMatrix[indices[j]][indices[i]]; }
354 within /= (float) numWithinComps;
356 double between = 0.0;
357 int numBetweenComps = 0;
359 map<string, vector<int> >::iterator itB;
361 for(map<string, vector<int> >::iterator itA=groupSampleMap.begin();itA!=groupSampleMap.end();itA++){
363 for(int i=0;i<itA->second.size();i++){
364 int A = itA->second[i];
365 map<string, vector<int> >::iterator itB = itA;
367 for(itB;itB!=groupSampleMap.end();itB++){
368 for(int j=0;j<itB->second.size();j++){
369 int B = itB->second[j];
370 if(A>B) { between += rankMatrix[A][B]; }
371 else { between += rankMatrix[B][A]; }
380 between /= (float) numBetweenComps;
382 double Rvalue = (between - within)/(numSamples * (numSamples-1) / 4.0);
386 catch(exception& e) {
387 m->errorOut(e, "AnosimCommand", "calcWithinBetween");
392 //**********************************************************************************************************************
394 vector<vector<double> > AnosimCommand::convertToRanks(vector<vector<double> > dist) {
396 vector<seqDist> cells;
397 vector<vector<double> > ranks = dist;
399 for (int i = 0; i < dist.size(); i++) {
400 for (int j = 0; j < i; j++) {
401 if(dist[i][j] != -1){
402 seqDist member(i, j, dist[i][j]);
403 cells.push_back(member);
410 sort(cells.begin(), cells.end(), compareSequenceDistance);
412 //find ranks of distances
415 for(int i=0;i<cells.size()-1;i++){
419 while(dist[cells[index].seq1][cells[index].seq2] == dist[cells[index+1].seq1][cells[index+1].seq2]){
421 indexSum += index + 1;
425 ranks[cells[i].seq1][cells[i].seq2] = i+1;
428 double aveIndex = (double)indexSum / (double)(index - i + 1);
429 for(int j=i;j<=index;j++){
430 ranks[cells[j].seq1][cells[j].seq2] = aveIndex;
436 if(indexSum == cells.size() - 1){
437 ranks[cells[cells.size()-1].seq1][cells[cells.size()-1].seq2] = indexSum + 1;
442 catch(exception& e) {
443 m->errorOut(e, "AnosimCommand", "convertToRanks");
448 //**********************************************************************************************************************
450 map<string, vector<int> > AnosimCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
452 vector<int> sampleIndices;
453 vector<int> samplesPerGroup;
455 map<string, vector<int> >::iterator it;
456 for(it=origMapping.begin();it!=origMapping.end();it++){
457 vector<int> indices = it->second;
458 samplesPerGroup.push_back(indices.size());
459 sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
462 random_shuffle(sampleIndices.begin(), sampleIndices.end());
465 map<string, vector<int> > randomizedGroups = origMapping;
466 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
467 for(int i=0;i<it->second.size();i++){
468 it->second[i] = sampleIndices[index++];
472 return randomizedGroups;
474 catch (exception& e) {
475 m->errorOut(e, "AnosimCommand", "randomizeGroups");
480 //**********************************************************************************************************************