5 * Created by westcott on 2/7/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "amovacommand.h"
11 #include "readphylipvector.h"
13 #include "sharedutilities.h"
16 //**********************************************************************************************************************
17 vector<string> AmovaCommand::setParameters(){
19 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","amova",false,true,true); parameters.push_back(pdesign);
20 CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets);
21 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none","amova",false,true,true); parameters.push_back(pphylip);
22 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
23 CommandParameter palpha("alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha);
24 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
25 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
27 vector<string> myArray;
28 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
32 m->errorOut(e, "AmovaCommand", "setParameters");
36 //**********************************************************************************************************************
37 string AmovaCommand::getHelpString(){
39 string helpString = "";
40 helpString += "Referenced: Anderson MJ (2001). A new method for non-parametric multivariate analysis of variance. Austral Ecol 26: 32-46.";
41 helpString += "The amova command outputs a .amova file.";
42 helpString += "The amova command parameters are phylip, iters, sets and alpha. The phylip and design parameters are required, unless you have valid current files.";
43 helpString += "The design parameter allows you to assign your samples to groups when you are running amova. It is required.";
44 helpString += "The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.";
45 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
46 helpString += "The iters parameter allows you to set number of randomization for the P value. The default is 1000.";
47 helpString += "The amova command should be in the following format: amova(phylip=file.dist, design=file.design).";
48 helpString += "Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).";
52 m->errorOut(e, "AmovaCommand", "getHelpString");
56 //**********************************************************************************************************************
57 string AmovaCommand::getOutputPattern(string type) {
61 if (type == "amova") { pattern = "[filename],amova"; } //makes file like: amazon.align
62 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
67 m->errorOut(e, "AmovaCommand", "getOutputPattern");
71 //**********************************************************************************************************************
72 AmovaCommand::AmovaCommand(){
74 abort = true; calledHelp = true;
76 vector<string> tempOutNames;
77 outputTypes["amova"] = tempOutNames;
80 m->errorOut(e, "AmovaCommand", "AmovaCommand");
84 //**********************************************************************************************************************
85 AmovaCommand::AmovaCommand(string option) {
87 abort = false; calledHelp = false;
89 //allow user to run help
90 if(option == "help") { help(); abort = true; calledHelp = true; }
91 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
94 vector<string> myArray = setParameters();
96 OptionParser parser(option);
97 map<string,string> parameters = parser.getParameters();
99 ValidParameters validParameter;
101 //check to make sure all parameters are valid for command
102 map<string,string>::iterator it;
103 for (it = parameters.begin(); it != parameters.end(); it++) {
104 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
107 //initialize outputTypes
108 vector<string> tempOutNames;
109 outputTypes["amova"] = tempOutNames;
111 //if the user changes the output directory command factory will send this info to us in the output parameter
112 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
114 //if the user changes the input directory command factory will send this info to us in the output parameter
115 string inputDir = validParameter.validFile(parameters, "inputdir", false);
116 if (inputDir == "not found"){ inputDir = ""; }
119 it = parameters.find("design");
120 //user has given a template file
121 if(it != parameters.end()){
122 path = m->hasPath(it->second);
123 //if the user has not given a path then, add inputdir. else leave path alone.
124 if (path == "") { parameters["design"] = inputDir + it->second; }
127 it = parameters.find("phylip");
128 //user has given a template file
129 if(it != parameters.end()){
130 path = m->hasPath(it->second);
131 //if the user has not given a path then, add inputdir. else leave path alone.
132 if (path == "") { parameters["phylip"] = inputDir + it->second; }
136 phylipFileName = validParameter.validFile(parameters, "phylip", true);
137 if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
138 else if (phylipFileName == "not found") {
139 //if there is a current phylip file, use it
140 phylipFileName = m->getPhylipFile();
141 if (phylipFileName != "") { m->mothurOut("Using " + phylipFileName + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
142 else { m->mothurOut("You have no current phylip file and the phylip parameter is required."); m->mothurOutEndLine(); abort = true; }
143 }else { m->setPhylipFile(phylipFileName); }
145 //check for required parameters
146 designFileName = validParameter.validFile(parameters, "design", true);
147 if (designFileName == "not open") { designFileName = ""; abort = true; }
148 else if (designFileName == "not found") {
149 //if there is a current design file, use it
150 designFileName = m->getDesignFile();
151 if (designFileName != "") { m->mothurOut("Using " + designFileName + " as input file for the design parameter."); m->mothurOutEndLine(); }
152 else { m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
153 }else { m->setDesignFile(designFileName); }
155 string temp = validParameter.validFile(parameters, "iters", false);
156 if (temp == "not found") { temp = "1000"; }
157 m->mothurConvert(temp, iters);
159 temp = validParameter.validFile(parameters, "alpha", false);
160 if (temp == "not found") { temp = "0.05"; }
161 m->mothurConvert(temp, experimentwiseAlpha);
163 string sets = validParameter.validFile(parameters, "sets", false);
164 if (sets == "not found") { sets = ""; }
166 m->splitAtDash(sets, Sets);
170 catch(exception& e) {
171 m->errorOut(e, "AmovaCommand", "AmovaCommand");
175 //**********************************************************************************************************************
177 int AmovaCommand::execute(){
180 if (abort == true) { if (calledHelp) { return 0; } return 2; }
183 designMap = new GroupMap(designFileName);
184 designMap->readDesignMap();
186 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
188 //read in distance matrix and square it
189 ReadPhylipVector readMatrix(phylipFileName);
190 vector<string> sampleNames = readMatrix.read(distanceMatrix);
192 if (Sets.size() != 0) { //user selected sets, so we want to remove the samples not in those sets
194 vector<string> dGroups = designMap->getNamesOfGroups();
195 util.setGroups(Sets, dGroups);
197 for(int i=0;i<distanceMatrix.size();i++){
199 if (m->control_pressed) { delete designMap; return 0; }
201 string group = designMap->getGroup(sampleNames[i]);
203 if (group == "not found") {
204 m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
205 }else if (!m->inUsersGroups(group, Sets)){ //not in set we want remove it
206 //remove from all other rows
207 for(int j=0;j<distanceMatrix.size();j++){
208 distanceMatrix[j].erase(distanceMatrix[j].begin()+i);
210 distanceMatrix.erase(distanceMatrix.begin()+i);
211 sampleNames.erase(sampleNames.begin()+i);
217 for(int i=0;i<distanceMatrix.size();i++){
218 for(int j=0;j<i;j++){
219 distanceMatrix[i][j] *= distanceMatrix[i][j];
223 //link designMap to rows/columns in distance matrix
224 map<string, vector<int> > origGroupSampleMap;
225 for(int i=0;i<sampleNames.size();i++){
226 string group = designMap->getGroup(sampleNames[i]);
228 if (group == "not found") {
229 m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
230 }else { origGroupSampleMap[group].push_back(i); }
233 int numGroups = origGroupSampleMap.size();
235 if (m->control_pressed) { delete designMap; return 0; }
237 //create a new filename
239 map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(phylipFileName));
240 string AMOVAFileName = getOutputFileName("amova", variables);
242 m->openOutputFile(AMOVAFileName, AMOVAFile);
243 outputNames.push_back(AMOVAFileName); outputTypes["amova"].push_back(AMOVAFileName);
245 double fullANOVAPValue = runAMOVA(AMOVAFile, origGroupSampleMap, experimentwiseAlpha);
246 if(fullANOVAPValue <= experimentwiseAlpha && numGroups > 2){
248 int numCombos = numGroups * (numGroups-1) / 2;
249 double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
251 map<string, vector<int> >::iterator itA;
252 map<string, vector<int> >::iterator itB;
254 for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
256 for(itB;itB!=origGroupSampleMap.end();itB++){
258 map<string, vector<int> > pairwiseGroupSampleMap;
259 pairwiseGroupSampleMap[itA->first] = itA->second;
260 pairwiseGroupSampleMap[itB->first] = itB->second;
262 runAMOVA(AMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha);
265 m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
266 m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
269 m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
271 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
276 m->mothurOutEndLine();
277 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
278 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
279 m->mothurOutEndLine();
283 catch(exception& e) {
284 m->errorOut(e, "AmovaCommand", "execute");
289 //**********************************************************************************************************************
291 double AmovaCommand::runAMOVA(ofstream& AMOVAFile, map<string, vector<int> > groupSampleMap, double alpha) {
293 map<string, vector<int> >::iterator it;
295 int numGroups = groupSampleMap.size();
296 int totalNumSamples = 0;
298 for(it = groupSampleMap.begin();it!=groupSampleMap.end();it++){
299 totalNumSamples += it->second.size();
302 double ssTotalOrig = calcSSTotal(groupSampleMap);
303 double ssWithinOrig = calcSSWithin(groupSampleMap);
304 double ssAmongOrig = ssTotalOrig - ssWithinOrig;
307 for(int i=0;i<iters;i++){
308 map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
309 double ssWithinRand = calcSSWithin(randomizedGroup);
310 if(ssWithinRand <= ssWithinOrig){ counter++; }
313 double pValue = (double)counter / (double) iters;
315 if(pValue < 1/(double)iters){ pString = '<' + toString(1/(double)iters); }
316 else { pString = toString(pValue); }
320 it = groupSampleMap.begin();
321 AMOVAFile << it->first;
322 m->mothurOut(it->first);
324 for(it;it!=groupSampleMap.end();it++){
325 AMOVAFile << '-' << it->first;
326 m->mothurOut('-' + it->first);
329 AMOVAFile << "\tAmong\tWithin\tTotal" << endl;
330 m->mothurOut("\tAmong\tWithin\tTotal\n");
332 AMOVAFile << "SS\t" << ssAmongOrig << '\t' << ssWithinOrig << '\t' << ssTotalOrig << endl;
333 m->mothurOut("SS\t" + toString(ssAmongOrig) + '\t' + toString(ssWithinOrig) + '\t' + toString(ssTotalOrig) + '\n');
335 int dfAmong = numGroups - 1; double MSAmong = ssAmongOrig / (double) dfAmong;
336 int dfWithin = totalNumSamples - numGroups; double MSWithin = ssWithinOrig / (double) dfWithin;
337 int dfTotal = totalNumSamples - 1; double Fs = MSAmong / MSWithin;
339 AMOVAFile << "df\t" << dfAmong << '\t' << dfWithin << '\t' << dfTotal << endl;
340 m->mothurOut("df\t" + toString(dfAmong) + '\t' + toString(dfWithin) + '\t' + toString(dfTotal) + '\n');
342 AMOVAFile << "MS\t" << MSAmong << '\t' << MSWithin << endl << endl;
343 m->mothurOut("MS\t" + toString(MSAmong) + '\t' + toString(MSWithin) + "\n\n");
345 AMOVAFile << "Fs:\t" << Fs << endl;
346 m->mothurOut("Fs:\t" + toString(Fs) + '\n');
348 AMOVAFile << "p-value: " << pString;
349 m->mothurOut("p-value: " + pString);
355 AMOVAFile << endl << endl;
356 m->mothurOutEndLine();m->mothurOutEndLine();
360 catch(exception& e) {
361 m->errorOut(e, "AmovaCommand", "runAMOVA");
366 //**********************************************************************************************************************
368 map<string, vector<int> > AmovaCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
370 vector<int> sampleIndices;
371 vector<int> samplesPerGroup;
373 map<string, vector<int> >::iterator it;
374 for(it=origMapping.begin();it!=origMapping.end();it++){
375 vector<int> indices = it->second;
376 samplesPerGroup.push_back(indices.size());
377 sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
380 random_shuffle(sampleIndices.begin(), sampleIndices.end());
383 map<string, vector<int> > randomizedGroups = origMapping;
384 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
385 for(int i=0;i<it->second.size();i++){
386 it->second[i] = sampleIndices[index++];
390 return randomizedGroups;
392 catch (exception& e) {
393 m->errorOut(e, "AmovaCommand", "getRandomizedGroups");
398 //**********************************************************************************************************************
400 double AmovaCommand::calcSSTotal(map<string, vector<int> >& groupSampleMap) {
404 map<string, vector<int> >::iterator it;
405 for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
406 indices.insert(indices.end(), it->second.begin(), it->second.end());
408 sort(indices.begin(), indices.end());
410 int numIndices =indices.size();
411 double ssTotal = 0.0;
413 for(int i=1;i<numIndices;i++){
414 int row = indices[i];
416 for(int j=0;j<i;j++){
417 ssTotal += distanceMatrix[row][indices[j]];
420 ssTotal /= numIndices;
424 catch(exception& e) {
425 m->errorOut(e, "AmovaCommand", "calcSSTotal");
430 //**********************************************************************************************************************
432 double AmovaCommand::calcSSWithin(map<string, vector<int> >& groupSampleMap) {
435 double ssWithin = 0.0;
437 map<string, vector<int> >::iterator it;
438 for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
440 double withinGroup = 0;
442 vector<int> samples = it->second;
444 for(int i=0;i<samples.size();i++){
445 int row = samples[i];
447 for(int j=0;j<samples.size();j++){
448 int col = samples[j];
451 withinGroup += distanceMatrix[row][col];
457 ssWithin += withinGroup / samples.size();
462 catch(exception& e) {
463 m->errorOut(e, "AmovaCommand", "calcSSWithin");
468 //**********************************************************************************************************************