5 * Created by westcott on 2/7/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "amovacommand.h"
11 #include "readphylipvector.h"
14 //**********************************************************************************************************************
15 vector<string> AmovaCommand::getValidParameters(){
17 string Array[] = {"outputdir","iters","phylip","design","alpha", "inputdir"};
18 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
22 m->errorOut(e, "AmovaCommand", "getValidParameters");
26 //**********************************************************************************************************************
27 AmovaCommand::AmovaCommand(){
29 abort = true; calledHelp = true;
30 vector<string> tempOutNames;
31 outputTypes["amova"] = tempOutNames;
34 m->errorOut(e, "AmovaCommand", "AmovaCommand");
38 //**********************************************************************************************************************
39 vector<string> AmovaCommand::getRequiredParameters(){
41 string Array[] = {"design"};
42 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
46 m->errorOut(e, "AmovaCommand", "getRequiredParameters");
50 //**********************************************************************************************************************
51 vector<string> AmovaCommand::getRequiredFiles(){
54 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
58 m->errorOut(e, "AmovaCommand", "getRequiredFiles");
62 //**********************************************************************************************************************
64 AmovaCommand::AmovaCommand(string option) {
66 abort = false; calledHelp = false;
68 //allow user to run help
69 if(option == "help") { help(); abort = true; calledHelp = true; }
72 //valid paramters for this command
73 string AlignArray[] = {"design","outputdir","iters","phylip","alpha", "inputdir"};
74 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
76 OptionParser parser(option);
77 map<string,string> parameters = parser.getParameters();
79 ValidParameters validParameter;
81 //check to make sure all parameters are valid for command
82 map<string,string>::iterator it;
83 for (it = parameters.begin(); it != parameters.end(); it++) {
84 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
87 //initialize outputTypes
88 vector<string> tempOutNames;
89 outputTypes["amova"] = tempOutNames;
91 //if the user changes the output directory command factory will send this info to us in the output parameter
92 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
94 //if the user changes the input directory command factory will send this info to us in the output parameter
95 string inputDir = validParameter.validFile(parameters, "inputdir", false);
96 if (inputDir == "not found"){ inputDir = ""; }
99 it = parameters.find("design");
100 //user has given a template file
101 if(it != parameters.end()){
102 path = m->hasPath(it->second);
103 //if the user has not given a path then, add inputdir. else leave path alone.
104 if (path == "") { parameters["design"] = inputDir + it->second; }
107 it = parameters.find("phylip");
108 //user has given a template file
109 if(it != parameters.end()){
110 path = m->hasPath(it->second);
111 //if the user has not given a path then, add inputdir. else leave path alone.
112 if (path == "") { parameters["phylip"] = inputDir + it->second; }
116 phylipFileName = validParameter.validFile(parameters, "phylip", true);
117 if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
118 else if (phylipFileName == "not found") {
121 //check currentFiles for a phylip file
122 //if (currentFiles->getPhylipFile() != "") { phylipFileName = currentFiles->getPhylipFile(); m->mothurOut("Using " + phylipFileName + " as phylip file."); m->mothurOutEndLine();
123 //}else { m->mothurOut("You must provide an phylip file."); m->mothurOutEndLine(); abort = true; }
126 //check for required parameters
127 designFileName = validParameter.validFile(parameters, "design", true);
128 if (designFileName == "not open") { abort = true; }
129 else if (designFileName == "not found") {
132 //check currentFiles for a design file
133 //if (currentFiles->getDesignFile() != "") { designFileName = currentFiles->getDesignFile(); m->mothurOut("Using " + designFileName + " as design file."); m->mothurOutEndLine();
134 //}else { m->mothurOut("You must provide an design file."); m->mothurOutEndLine(); abort = true; }
137 string temp = validParameter.validFile(parameters, "iters", false);
138 if (temp == "not found") { temp = "1000"; }
139 convert(temp, iters);
141 temp = validParameter.validFile(parameters, "alpha", false);
142 if (temp == "not found") { temp = "0.05"; }
143 convert(temp, experimentwiseAlpha);
146 catch(exception& e) {
147 m->errorOut(e, "AmovaCommand", "AmovaCommand");
152 //**********************************************************************************************************************
154 void AmovaCommand::help(){
156 m->mothurOut("Referenced: Anderson MJ (2001). A new method for non-parametric multivariate analysis of variance. Austral Ecol 26: 32-46.\n");
157 m->mothurOut("The amova command outputs a .amova file. \n");
158 m->mothurOut("The amova command parameters are phylip, iters, and alpha. The phylip and design parameters are required.\n");
159 m->mothurOut("The design parameter allows you to assign your samples to groups when you are running amova. It is required. \n");
160 m->mothurOut("The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.\n");
161 m->mothurOut("The iters parameter allows you to set number of randomization for the P value. The default is 1000. \n");
162 m->mothurOut("The amova command should be in the following format: amova(phylip=file.dist, design=file.design).\n");
163 m->mothurOut("Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).\n\n");
165 catch(exception& e) {
166 m->errorOut(e, "AmovaCommand", "help");
171 //**********************************************************************************************************************
173 AmovaCommand::~AmovaCommand(){}
175 //**********************************************************************************************************************
177 int AmovaCommand::execute(){
180 if (abort == true) { if (calledHelp) { return 0; } return 2; }
183 designMap = new GroupMap(designFileName);
184 designMap->readDesignMap();
186 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
188 //read in distance matrix and square it
189 ReadPhylipVector readMatrix(phylipFileName);
190 vector<string> sampleNames = readMatrix.read(distanceMatrix);
192 for(int i=0;i<distanceMatrix.size();i++){
193 for(int j=0;j<i;j++){
194 distanceMatrix[i][j] *= distanceMatrix[i][j];
198 //link designMap to rows/columns in distance matrix
199 map<string, vector<int> > origGroupSampleMap;
200 for(int i=0;i<sampleNames.size();i++){
201 origGroupSampleMap[designMap->getGroup(sampleNames[i])].push_back(i);
203 int numGroups = origGroupSampleMap.size();
205 //create a new filename
207 string AMOVAFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName)) + "amova";
208 m->openOutputFile(AMOVAFileName, AMOVAFile);
209 outputNames.push_back(AMOVAFileName); outputTypes["amova"].push_back(AMOVAFileName);
211 double fullANOVAPValue = runAMOVA(AMOVAFile, origGroupSampleMap, experimentwiseAlpha);
212 if(fullANOVAPValue <= experimentwiseAlpha && numGroups > 2){
214 int numCombos = numGroups * (numGroups-1) / 2;
215 double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
217 map<string, vector<int> >::iterator itA;
218 map<string, vector<int> >::iterator itB;
220 for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
222 for(itB;itB!=origGroupSampleMap.end();itB++){
224 map<string, vector<int> > pairwiseGroupSampleMap;
225 pairwiseGroupSampleMap[itA->first] = itA->second;
226 pairwiseGroupSampleMap[itB->first] = itB->second;
228 runAMOVA(AMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha);
231 m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
232 m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
235 m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
237 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
242 m->mothurOutEndLine();
243 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
244 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
245 m->mothurOutEndLine();
249 catch(exception& e) {
250 m->errorOut(e, "AmovaCommand", "execute");
255 //**********************************************************************************************************************
257 double AmovaCommand::runAMOVA(ofstream& AMOVAFile, map<string, vector<int> > groupSampleMap, double alpha) {
259 map<string, vector<int> >::iterator it;
261 int numGroups = groupSampleMap.size();
262 int totalNumSamples = 0;
264 for(it = groupSampleMap.begin();it!=groupSampleMap.end();it++){
265 totalNumSamples += it->second.size();
268 double ssTotalOrig = calcSSTotal(groupSampleMap);
269 double ssWithinOrig = calcSSWithin(groupSampleMap);
270 double ssAmongOrig = ssTotalOrig - ssWithinOrig;
273 for(int i=0;i<iters;i++){
274 map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
275 double ssWithinRand = calcSSWithin(randomizedGroup);
276 if(ssWithinRand < ssWithinOrig){ counter++; }
279 double pValue = (double)counter / (double) iters;
281 if(pValue < 1/(double)iters){ pString = '<' + toString(1/(double)iters); }
282 else { pString = toString(pValue); }
286 it = groupSampleMap.begin();
287 AMOVAFile << it->first;
288 m->mothurOut(it->first);
290 for(it;it!=groupSampleMap.end();it++){
291 AMOVAFile << '-' << it->first;
292 m->mothurOut('-' + it->first);
295 AMOVAFile << "\tAmong\tWithin\tTotal" << endl;
296 m->mothurOut("\tAmong\tWithin\tTotal\n");
298 AMOVAFile << "SS\t" << ssAmongOrig << '\t' << ssWithinOrig << '\t' << ssTotalOrig << endl;
299 m->mothurOut("SS\t" + toString(ssAmongOrig) + '\t' + toString(ssWithinOrig) + '\t' + toString(ssTotalOrig) + '\n');
301 int dfAmong = numGroups - 1; double MSAmong = ssAmongOrig / (double) dfAmong;
302 int dfWithin = totalNumSamples - numGroups; double MSWithin = ssWithinOrig / (double) dfWithin;
303 int dfTotal = totalNumSamples - 1; double Fs = MSAmong / MSWithin;
305 AMOVAFile << "df\t" << dfAmong << '\t' << dfWithin << '\t' << dfTotal << endl;
306 m->mothurOut("df\t" + toString(dfAmong) + '\t' + toString(dfWithin) + '\t' + toString(dfTotal) + '\n');
308 AMOVAFile << "MS\t" << MSAmong << '\t' << MSWithin << endl << endl;
309 m->mothurOut("MS\t" + toString(MSAmong) + '\t' + toString(MSWithin) + "\n\n");
311 AMOVAFile << "Fs:\t" << Fs << endl;
312 m->mothurOut("Fs:\t" + toString(Fs) + '\n');
314 AMOVAFile << "p-value: " << pString;
315 m->mothurOut("p-value: " + pString);
321 AMOVAFile << endl << endl;
322 m->mothurOutEndLine();m->mothurOutEndLine();
326 catch(exception& e) {
327 m->errorOut(e, "AmovaCommand", "runAMOVA");
332 //**********************************************************************************************************************
334 map<string, vector<int> > AmovaCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
336 vector<int> sampleIndices;
337 vector<int> samplesPerGroup;
339 map<string, vector<int> >::iterator it;
340 for(it=origMapping.begin();it!=origMapping.end();it++){
341 vector<int> indices = it->second;
342 samplesPerGroup.push_back(indices.size());
343 sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
346 random_shuffle(sampleIndices.begin(), sampleIndices.end());
349 map<string, vector<int> > randomizedGroups = origMapping;
350 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
351 for(int i=0;i<it->second.size();i++){
352 it->second[i] = sampleIndices[index++];
356 return randomizedGroups;
358 catch (exception& e) {
359 m->errorOut(e, "AmovaCommand", "getRandomizedGroups");
364 //**********************************************************************************************************************
366 double AmovaCommand::calcSSTotal(map<string, vector<int> >& groupSampleMap) {
370 map<string, vector<int> >::iterator it;
371 for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
372 indices.insert(indices.end(), it->second.begin(), it->second.end());
374 sort(indices.begin(), indices.end());
376 int numIndices =indices.size();
377 double ssTotal = 0.0;
379 for(int i=1;i<numIndices;i++){
380 int row = indices[i];
382 for(int j=0;j<i;j++){
383 ssTotal += distanceMatrix[row][indices[j]];
386 ssTotal /= numIndices;
390 catch(exception& e) {
391 m->errorOut(e, "AmovaCommand", "calcSSTotal");
396 //**********************************************************************************************************************
398 double AmovaCommand::calcSSWithin(map<string, vector<int> >& groupSampleMap) {
401 double ssWithin = 0.0;
403 map<string, vector<int> >::iterator it;
404 for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
406 double withinGroup = 0;
408 vector<int> samples = it->second;
410 for(int i=0;i<samples.size();i++){
411 int row = samples[i];
413 for(int j=0;j<samples.size();j++){
414 int col = samples[j];
417 withinGroup += distanceMatrix[row][col];
423 ssWithin += withinGroup / samples.size();
428 catch(exception& e) {
429 m->errorOut(e, "AmovaCommand", "calcSSWithin");
434 //**********************************************************************************************************************