5 * Created by westcott on 2/14/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "anosimcommand.h"
11 #include "inputdata.h"
12 #include "readphylipvector.h"
14 //**********************************************************************************************************************
15 vector<string> AnosimCommand::getValidParameters(){
17 string Array[] = {"outputdir","iters","phylip","design", "alpha","inputdir"};
18 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
22 m->errorOut(e, "AnosimCommand", "getValidParameters");
26 //**********************************************************************************************************************
27 AnosimCommand::AnosimCommand(){
29 abort = true; calledHelp = true;
30 vector<string> tempOutNames;
31 outputTypes["anosim"] = tempOutNames;
34 m->errorOut(e, "AnosimCommand", "AnosimCommand");
38 //**********************************************************************************************************************
39 vector<string> AnosimCommand::getRequiredParameters(){
41 string Array[] = {"design"};
42 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
46 m->errorOut(e, "AnosimCommand", "getRequiredParameters");
50 //**********************************************************************************************************************
51 vector<string> AnosimCommand::getRequiredFiles(){
54 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
58 m->errorOut(e, "AnosimCommand", "getRequiredFiles");
62 //**********************************************************************************************************************
64 AnosimCommand::AnosimCommand(string option) {
66 abort = false; calledHelp = false;
68 //allow user to run help
69 if(option == "help") { help(); abort = true; calledHelp = true; }
72 //valid paramters for this command
73 string AlignArray[] = {"outputdir","iters","phylip","design", "alpha","inputdir"};
74 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
76 OptionParser parser(option);
77 map<string,string> parameters = parser.getParameters();
79 ValidParameters validParameter;
81 //check to make sure all parameters are valid for command
82 map<string,string>::iterator it;
83 for (it = parameters.begin(); it != parameters.end(); it++) {
84 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
87 //initialize outputTypes
88 vector<string> tempOutNames;
89 outputTypes["anosim"] = tempOutNames;
91 //if the user changes the output directory command factory will send this info to us in the output parameter
92 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
94 //if the user changes the input directory command factory will send this info to us in the output parameter
95 string inputDir = validParameter.validFile(parameters, "inputdir", false);
96 if (inputDir == "not found"){ inputDir = ""; }
99 it = parameters.find("design");
100 //user has given a template file
101 if(it != parameters.end()){
102 path = m->hasPath(it->second);
103 //if the user has not given a path then, add inputdir. else leave path alone.
104 if (path == "") { parameters["design"] = inputDir + it->second; }
107 it = parameters.find("phylip");
108 //user has given a template file
109 if(it != parameters.end()){
110 path = m->hasPath(it->second);
111 //if the user has not given a path then, add inputdir. else leave path alone.
112 if (path == "") { parameters["phylip"] = inputDir + it->second; }
116 phylipFileName = validParameter.validFile(parameters, "phylip", true);
117 if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
118 else if (phylipFileName == "not found") { phylipFileName = ""; }
119 else if (designFileName == "not found") {
121 m->mothurOut("You must provide an phylip file.");
122 m->mothurOutEndLine();
126 //check for required parameters
127 designFileName = validParameter.validFile(parameters, "design", true);
128 if (designFileName == "not open") { abort = true; }
129 else if (designFileName == "not found") {
131 m->mothurOut("You must provide an design file.");
132 m->mothurOutEndLine();
136 string temp = validParameter.validFile(parameters, "iters", false);
137 if (temp == "not found") { temp = "1000"; }
138 convert(temp, iters);
140 temp = validParameter.validFile(parameters, "alpha", false);
141 if (temp == "not found") { temp = "0.05"; }
142 convert(temp, experimentwiseAlpha);
146 catch(exception& e) {
147 m->errorOut(e, "AnosimCommand", "AnosimCommand");
152 //**********************************************************************************************************************
154 void AnosimCommand::help(){
156 m->mothurOut("Referenced: Clarke, K. R. (1993). Non-parametric multivariate analysis of changes in community structure. _Australian Journal of Ecology_ 18, 117-143.\n");
157 m->mothurOut("The anosim command outputs a .anosim file. \n");
158 m->mothurOut("The anosim command parameters are phylip, iters, and alpha. The phylip and design parameters are required.\n");
159 m->mothurOut("The design parameter allows you to assign your samples to groups when you are running anosim. It is required. \n");
160 m->mothurOut("The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.\n");
161 m->mothurOut("The iters parameter allows you to set number of randomization for the P value. The default is 1000. \n");
162 m->mothurOut("The anosim command should be in the following format: anosim(phylip=file.dist, design=file.design).\n");
163 m->mothurOut("Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).\n\n");
166 catch(exception& e) {
167 m->errorOut(e, "AnosimCommand", "help");
172 //**********************************************************************************************************************
174 AnosimCommand::~AnosimCommand(){}
176 //**********************************************************************************************************************
178 int AnosimCommand::execute(){
181 if (abort == true) { if (calledHelp) { return 0; } return 2; }
184 designMap = new GroupMap(designFileName);
185 designMap->readDesignMap();
187 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
189 //read in distance matrix and square it
190 ReadPhylipVector readMatrix(phylipFileName);
191 vector<string> sampleNames = readMatrix.read(distanceMatrix);
193 for(int i=0;i<distanceMatrix.size();i++){
194 for(int j=0;j<i;j++){
195 distanceMatrix[i][j] *= distanceMatrix[i][j];
199 //link designMap to rows/columns in distance matrix
200 map<string, vector<int> > origGroupSampleMap;
201 for(int i=0;i<sampleNames.size();i++){
202 origGroupSampleMap[designMap->getGroup(sampleNames[i])].push_back(i);
204 int numGroups = origGroupSampleMap.size();
206 //create a new filename
208 string ANOSIMFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName)) + "anosim";
209 m->openOutputFile(ANOSIMFileName, ANOSIMFile);
210 outputNames.push_back(ANOSIMFileName); outputTypes["anosim"].push_back(ANOSIMFileName);
211 m->mothurOut("\ncomparison\tR-value\tP-value\n");
212 ANOSIMFile << "comparison\tR-value\tP-value\n";
215 double fullANOSIMPValue = runANOSIM(ANOSIMFile, distanceMatrix, origGroupSampleMap, experimentwiseAlpha);
218 if(fullANOSIMPValue <= experimentwiseAlpha && numGroups > 2){
220 int numCombos = numGroups * (numGroups-1) / 2;
221 double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
223 for(map<string, vector<int> >::iterator itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
224 map<string, vector<int> >::iterator itB = itA;
226 for(itB;itB!=origGroupSampleMap.end();itB++){
228 map<string, vector<int> > subGroupSampleMap;
230 subGroupSampleMap[itA->first] = itA->second; string groupA = itA->first;
231 subGroupSampleMap[itB->first] = itB->second; string groupB = itB->first;
233 vector<int> subIndices;
234 for(map<string, vector<int> >::iterator it=subGroupSampleMap.begin();it!=subGroupSampleMap.end();it++){
235 subIndices.insert(subIndices.end(), it->second.begin(), it->second.end());
237 int subNumSamples = subIndices.size();
239 sort(subIndices.begin(), subIndices.end());
241 vector<vector<double> > subDistMatrix(distanceMatrix.size());
242 for(int i=0;i<distanceMatrix.size();i++){
243 subDistMatrix[i].assign(distanceMatrix.size(), -1);
246 for(int i=0;i<subNumSamples;i++){
247 for(int j=0;j<i;j++){
248 subDistMatrix[subIndices[i]][subIndices[j]] = distanceMatrix[subIndices[i]][subIndices[j]];
252 runANOSIM(ANOSIMFile, subDistMatrix, subGroupSampleMap, pairwiseAlpha);
257 m->mothurOut("\nExperiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
258 m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
261 m->mothurOut("\nExperiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
263 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
269 m->mothurOutEndLine();
270 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
271 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
272 m->mothurOutEndLine();
276 catch(exception& e) {
277 m->errorOut(e, "AnosimCommand", "execute");
281 //**********************************************************************************************************************
283 double AnosimCommand::runANOSIM(ofstream& ANOSIMFile, vector<vector<double> > dMatrix, map<string, vector<int> > groupSampleMap, double alpha) {
287 vector<vector<double> > rankMatrix = convertToRanks(dMatrix);
288 double RValue = calcR(rankMatrix, groupSampleMap);
291 for(int i=0;i<iters;i++){
292 map<string, vector<int> > randGroupSampleMap = getRandomizedGroups(groupSampleMap);
293 double RValueRand = calcR(rankMatrix, randGroupSampleMap);
294 if(RValue <= RValueRand){ pCount++; }
297 double pValue = (double)pCount / (double) iters;
299 if(pValue < 1/(double)iters){ pString = '<' + toString(1/(double)iters); }
300 else { pString = toString(pValue); }
303 map<string, vector<int> >::iterator it=groupSampleMap.begin();
304 m->mothurOut(it->first);
305 ANOSIMFile << it->first;
307 for(it;it!=groupSampleMap.end();it++){
308 m->mothurOut('-' + it->first);
309 ANOSIMFile << '-' << it->first;
312 m->mothurOut('\t' + toString(RValue) + '\t' + pString);
313 ANOSIMFile << '\t' << RValue << '\t' << pString;
320 m->mothurOutEndLine();
324 catch(exception& e) {
325 m->errorOut(e, "AnosimCommand", "calcAnisom");
330 //**********************************************************************************************************************
332 double AnosimCommand::calcR(vector<vector<double> > rankMatrix, map<string, vector<int> > groupSampleMap){
336 for(map<string, vector<int> >::iterator it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
337 numSamples += it->second.size();
342 int numWithinComps = 0;
344 for(map<string, vector<int> >::iterator it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
345 vector<int> indices = it->second;
346 for(int i=0;i<indices.size();i++){
347 for(int j=0;j<i;j++){
348 if(indices[i] > indices[j]) { within += rankMatrix[indices[i]][indices[j]]; }
349 else { within += rankMatrix[indices[j]][indices[i]]; }
355 within /= (float) numWithinComps;
357 double between = 0.0;
358 int numBetweenComps = 0;
360 map<string, vector<int> >::iterator itB;
362 for(map<string, vector<int> >::iterator itA=groupSampleMap.begin();itA!=groupSampleMap.end();itA++){
364 for(int i=0;i<itA->second.size();i++){
365 int A = itA->second[i];
366 map<string, vector<int> >::iterator itB = itA;
368 for(itB;itB!=groupSampleMap.end();itB++){
369 for(int j=0;j<itB->second.size();j++){
370 int B = itB->second[j];
371 if(A>B) { between += rankMatrix[A][B]; }
372 else { between += rankMatrix[B][A]; }
381 between /= (float) numBetweenComps;
383 double Rvalue = (between - within)/(numSamples * (numSamples-1) / 4.0);
387 catch(exception& e) {
388 m->errorOut(e, "AnosimCommand", "calcWithinBetween");
393 //**********************************************************************************************************************
395 vector<vector<double> > AnosimCommand::convertToRanks(vector<vector<double> > dist) {
397 vector<seqDist> cells;
398 vector<vector<double> > ranks = dist;
400 for (int i = 0; i < dist.size(); i++) {
401 for (int j = 0; j < i; j++) {
402 if(dist[i][j] != -1){
403 seqDist member(i, j, dist[i][j]);
404 cells.push_back(member);
411 sort(cells.begin(), cells.end(), compareSequenceDistance);
413 //find ranks of distances
416 for(int i=0;i<cells.size()-1;i++){
420 while(dist[cells[index].seq1][cells[index].seq2] == dist[cells[index+1].seq1][cells[index+1].seq2]){
422 indexSum += index + 1;
426 ranks[cells[i].seq1][cells[i].seq2] = i+1;
429 double aveIndex = (double)indexSum / (double)(index - i + 1);
430 for(int j=i;j<=index;j++){
431 ranks[cells[j].seq1][cells[j].seq2] = aveIndex;
437 if(indexSum == cells.size() - 1){
438 ranks[cells[cells.size()-1].seq1][cells[cells.size()-1].seq2] = indexSum + 1;
443 catch(exception& e) {
444 m->errorOut(e, "AnosimCommand", "convertToRanks");
449 //**********************************************************************************************************************
451 map<string, vector<int> > AnosimCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
453 vector<int> sampleIndices;
454 vector<int> samplesPerGroup;
456 map<string, vector<int> >::iterator it;
457 for(it=origMapping.begin();it!=origMapping.end();it++){
458 vector<int> indices = it->second;
459 samplesPerGroup.push_back(indices.size());
460 sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
463 random_shuffle(sampleIndices.begin(), sampleIndices.end());
466 map<string, vector<int> > randomizedGroups = origMapping;
467 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
468 for(int i=0;i<it->second.size();i++){
469 it->second[i] = sampleIndices[index++];
473 return randomizedGroups;
475 catch (exception& e) {
476 m->errorOut(e, "AnosimCommand", "randomizeGroups");
481 //**********************************************************************************************************************