5 * Created by westcott on 2/7/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "amovacommand.h"
11 #include "readphylipvector.h"
15 //**********************************************************************************************************************
16 vector<string> AmovaCommand::setParameters(){
18 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
19 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pphylip);
20 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
21 CommandParameter palpha("alpha", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(palpha);
22 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
23 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25 vector<string> myArray;
26 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
30 m->errorOut(e, "AmovaCommand", "setParameters");
34 //**********************************************************************************************************************
35 string AmovaCommand::getHelpString(){
37 string helpString = "";
38 helpString += "Referenced: Anderson MJ (2001). A new method for non-parametric multivariate analysis of variance. Austral Ecol 26: 32-46.";
39 helpString += "The amova command outputs a .amova file.";
40 helpString += "The amova command parameters are phylip, iters, and alpha. The phylip and design parameters are required, unless you have valid current files.";
41 helpString += "The design parameter allows you to assign your samples to groups when you are running amova. It is required.";
42 helpString += "The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.";
43 helpString += "The iters parameter allows you to set number of randomization for the P value. The default is 1000.";
44 helpString += "The amova command should be in the following format: amova(phylip=file.dist, design=file.design).";
45 helpString += "Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).";
49 m->errorOut(e, "AmovaCommand", "getHelpString");
53 //**********************************************************************************************************************
54 string AmovaCommand::getOutputFileNameTag(string type, string inputName=""){
57 map<string, vector<string> >::iterator it;
59 //is this a type this command creates
60 it = outputTypes.find(type);
61 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
63 if (type == "amova") { tag = "amova"; }
64 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file.\n"); }
69 m->errorOut(e, "AmovaCommand", "getOutputFileNameTag");
73 //**********************************************************************************************************************
74 AmovaCommand::AmovaCommand(){
76 abort = true; calledHelp = true;
78 vector<string> tempOutNames;
79 outputTypes["amova"] = tempOutNames;
82 m->errorOut(e, "AmovaCommand", "AmovaCommand");
86 //**********************************************************************************************************************
87 AmovaCommand::AmovaCommand(string option) {
89 abort = false; calledHelp = false;
91 //allow user to run help
92 if(option == "help") { help(); abort = true; calledHelp = true; }
93 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
96 vector<string> myArray = setParameters();
98 OptionParser parser(option);
99 map<string,string> parameters = parser.getParameters();
101 ValidParameters validParameter;
103 //check to make sure all parameters are valid for command
104 map<string,string>::iterator it;
105 for (it = parameters.begin(); it != parameters.end(); it++) {
106 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
109 //initialize outputTypes
110 vector<string> tempOutNames;
111 outputTypes["amova"] = tempOutNames;
113 //if the user changes the output directory command factory will send this info to us in the output parameter
114 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
116 //if the user changes the input directory command factory will send this info to us in the output parameter
117 string inputDir = validParameter.validFile(parameters, "inputdir", false);
118 if (inputDir == "not found"){ inputDir = ""; }
121 it = parameters.find("design");
122 //user has given a template file
123 if(it != parameters.end()){
124 path = m->hasPath(it->second);
125 //if the user has not given a path then, add inputdir. else leave path alone.
126 if (path == "") { parameters["design"] = inputDir + it->second; }
129 it = parameters.find("phylip");
130 //user has given a template file
131 if(it != parameters.end()){
132 path = m->hasPath(it->second);
133 //if the user has not given a path then, add inputdir. else leave path alone.
134 if (path == "") { parameters["phylip"] = inputDir + it->second; }
138 phylipFileName = validParameter.validFile(parameters, "phylip", true);
139 if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
140 else if (phylipFileName == "not found") {
141 //if there is a current phylip file, use it
142 phylipFileName = m->getPhylipFile();
143 if (phylipFileName != "") { m->mothurOut("Using " + phylipFileName + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
144 else { m->mothurOut("You have no current phylip file and the phylip parameter is required."); m->mothurOutEndLine(); abort = true; }
145 }else { m->setPhylipFile(phylipFileName); }
147 //check for required parameters
148 designFileName = validParameter.validFile(parameters, "design", true);
149 if (designFileName == "not open") { designFileName = ""; abort = true; }
150 else if (designFileName == "not found") {
151 //if there is a current design file, use it
152 designFileName = m->getDesignFile();
153 if (designFileName != "") { m->mothurOut("Using " + designFileName + " as input file for the design parameter."); m->mothurOutEndLine(); }
154 else { m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
155 }else { m->setDesignFile(designFileName); }
157 string temp = validParameter.validFile(parameters, "iters", false);
158 if (temp == "not found") { temp = "1000"; }
159 m->mothurConvert(temp, iters);
161 temp = validParameter.validFile(parameters, "alpha", false);
162 if (temp == "not found") { temp = "0.05"; }
163 m->mothurConvert(temp, experimentwiseAlpha);
166 catch(exception& e) {
167 m->errorOut(e, "AmovaCommand", "AmovaCommand");
171 //**********************************************************************************************************************
173 int AmovaCommand::execute(){
176 if (abort == true) { if (calledHelp) { return 0; } return 2; }
179 designMap = new GroupMap(designFileName);
180 designMap->readDesignMap();
182 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
184 //read in distance matrix and square it
185 ReadPhylipVector readMatrix(phylipFileName);
186 vector<string> sampleNames = readMatrix.read(distanceMatrix);
188 for(int i=0;i<distanceMatrix.size();i++){
189 for(int j=0;j<i;j++){
190 distanceMatrix[i][j] *= distanceMatrix[i][j];
194 //link designMap to rows/columns in distance matrix
195 map<string, vector<int> > origGroupSampleMap;
196 for(int i=0;i<sampleNames.size();i++){
197 string group = designMap->getGroup(sampleNames[i]);
199 if (group == "not found") {
200 m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
201 }else { origGroupSampleMap[group].push_back(i); }
204 int numGroups = origGroupSampleMap.size();
206 if (m->control_pressed) { delete designMap; return 0; }
208 //create a new filename
210 string AMOVAFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName)) + getOutputFileNameTag("amova");
211 m->openOutputFile(AMOVAFileName, AMOVAFile);
212 outputNames.push_back(AMOVAFileName); outputTypes["amova"].push_back(AMOVAFileName);
214 double fullANOVAPValue = runAMOVA(AMOVAFile, origGroupSampleMap, experimentwiseAlpha);
215 if(fullANOVAPValue <= experimentwiseAlpha && numGroups > 2){
217 int numCombos = numGroups * (numGroups-1) / 2;
218 double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
220 map<string, vector<int> >::iterator itA;
221 map<string, vector<int> >::iterator itB;
223 for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
225 for(itB;itB!=origGroupSampleMap.end();itB++){
227 map<string, vector<int> > pairwiseGroupSampleMap;
228 pairwiseGroupSampleMap[itA->first] = itA->second;
229 pairwiseGroupSampleMap[itB->first] = itB->second;
231 runAMOVA(AMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha);
234 m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
235 m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
238 m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
240 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
245 m->mothurOutEndLine();
246 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
247 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
248 m->mothurOutEndLine();
252 catch(exception& e) {
253 m->errorOut(e, "AmovaCommand", "execute");
258 //**********************************************************************************************************************
260 double AmovaCommand::runAMOVA(ofstream& AMOVAFile, map<string, vector<int> > groupSampleMap, double alpha) {
262 map<string, vector<int> >::iterator it;
264 int numGroups = groupSampleMap.size();
265 int totalNumSamples = 0;
267 for(it = groupSampleMap.begin();it!=groupSampleMap.end();it++){
268 totalNumSamples += it->second.size();
271 double ssTotalOrig = calcSSTotal(groupSampleMap);
272 double ssWithinOrig = calcSSWithin(groupSampleMap);
273 double ssAmongOrig = ssTotalOrig - ssWithinOrig;
276 for(int i=0;i<iters;i++){
277 map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
278 double ssWithinRand = calcSSWithin(randomizedGroup);
279 if(ssWithinRand < ssWithinOrig){ counter++; }
282 double pValue = (double)counter / (double) iters;
284 if(pValue < 1/(double)iters){ pString = '<' + toString(1/(double)iters); }
285 else { pString = toString(pValue); }
289 it = groupSampleMap.begin();
290 AMOVAFile << it->first;
291 m->mothurOut(it->first);
293 for(it;it!=groupSampleMap.end();it++){
294 AMOVAFile << '-' << it->first;
295 m->mothurOut('-' + it->first);
298 AMOVAFile << "\tAmong\tWithin\tTotal" << endl;
299 m->mothurOut("\tAmong\tWithin\tTotal\n");
301 AMOVAFile << "SS\t" << ssAmongOrig << '\t' << ssWithinOrig << '\t' << ssTotalOrig << endl;
302 m->mothurOut("SS\t" + toString(ssAmongOrig) + '\t' + toString(ssWithinOrig) + '\t' + toString(ssTotalOrig) + '\n');
304 int dfAmong = numGroups - 1; double MSAmong = ssAmongOrig / (double) dfAmong;
305 int dfWithin = totalNumSamples - numGroups; double MSWithin = ssWithinOrig / (double) dfWithin;
306 int dfTotal = totalNumSamples - 1; double Fs = MSAmong / MSWithin;
308 AMOVAFile << "df\t" << dfAmong << '\t' << dfWithin << '\t' << dfTotal << endl;
309 m->mothurOut("df\t" + toString(dfAmong) + '\t' + toString(dfWithin) + '\t' + toString(dfTotal) + '\n');
311 AMOVAFile << "MS\t" << MSAmong << '\t' << MSWithin << endl << endl;
312 m->mothurOut("MS\t" + toString(MSAmong) + '\t' + toString(MSWithin) + "\n\n");
314 AMOVAFile << "Fs:\t" << Fs << endl;
315 m->mothurOut("Fs:\t" + toString(Fs) + '\n');
317 AMOVAFile << "p-value: " << pString;
318 m->mothurOut("p-value: " + pString);
324 AMOVAFile << endl << endl;
325 m->mothurOutEndLine();m->mothurOutEndLine();
329 catch(exception& e) {
330 m->errorOut(e, "AmovaCommand", "runAMOVA");
335 //**********************************************************************************************************************
337 map<string, vector<int> > AmovaCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
339 vector<int> sampleIndices;
340 vector<int> samplesPerGroup;
342 map<string, vector<int> >::iterator it;
343 for(it=origMapping.begin();it!=origMapping.end();it++){
344 vector<int> indices = it->second;
345 samplesPerGroup.push_back(indices.size());
346 sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
349 random_shuffle(sampleIndices.begin(), sampleIndices.end());
352 map<string, vector<int> > randomizedGroups = origMapping;
353 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
354 for(int i=0;i<it->second.size();i++){
355 it->second[i] = sampleIndices[index++];
359 return randomizedGroups;
361 catch (exception& e) {
362 m->errorOut(e, "AmovaCommand", "getRandomizedGroups");
367 //**********************************************************************************************************************
369 double AmovaCommand::calcSSTotal(map<string, vector<int> >& groupSampleMap) {
373 map<string, vector<int> >::iterator it;
374 for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
375 indices.insert(indices.end(), it->second.begin(), it->second.end());
377 sort(indices.begin(), indices.end());
379 int numIndices =indices.size();
380 double ssTotal = 0.0;
382 for(int i=1;i<numIndices;i++){
383 int row = indices[i];
385 for(int j=0;j<i;j++){
386 ssTotal += distanceMatrix[row][indices[j]];
389 ssTotal /= numIndices;
393 catch(exception& e) {
394 m->errorOut(e, "AmovaCommand", "calcSSTotal");
399 //**********************************************************************************************************************
401 double AmovaCommand::calcSSWithin(map<string, vector<int> >& groupSampleMap) {
404 double ssWithin = 0.0;
406 map<string, vector<int> >::iterator it;
407 for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
409 double withinGroup = 0;
411 vector<int> samples = it->second;
413 for(int i=0;i<samples.size();i++){
414 int row = samples[i];
416 for(int j=0;j<samples.size();j++){
417 int col = samples[j];
420 withinGroup += distanceMatrix[row][col];
426 ssWithin += withinGroup / samples.size();
431 catch(exception& e) {
432 m->errorOut(e, "AmovaCommand", "calcSSWithin");
437 //**********************************************************************************************************************