5 * Created by westcott on 2/7/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "amovacommand.h"
11 #include "readphylipvector.h"
13 #include "sharedutilities.h"
16 //**********************************************************************************************************************
17 vector<string> AmovaCommand::setParameters(){
19 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
20 CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
21 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pphylip);
22 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23 CommandParameter palpha("alpha", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(palpha);
24 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27 vector<string> myArray;
28 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
32 m->errorOut(e, "AmovaCommand", "setParameters");
36 //**********************************************************************************************************************
37 string AmovaCommand::getHelpString(){
39 string helpString = "";
40 helpString += "Referenced: Anderson MJ (2001). A new method for non-parametric multivariate analysis of variance. Austral Ecol 26: 32-46.";
41 helpString += "The amova command outputs a .amova file.";
42 helpString += "The amova command parameters are phylip, iters, sets and alpha. The phylip and design parameters are required, unless you have valid current files.";
43 helpString += "The design parameter allows you to assign your samples to groups when you are running amova. It is required.";
44 helpString += "The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.";
45 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
46 helpString += "The iters parameter allows you to set number of randomization for the P value. The default is 1000.";
47 helpString += "The amova command should be in the following format: amova(phylip=file.dist, design=file.design).";
48 helpString += "Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).";
52 m->errorOut(e, "AmovaCommand", "getHelpString");
56 //**********************************************************************************************************************
57 string AmovaCommand::getOutputFileNameTag(string type, string inputName=""){
60 map<string, vector<string> >::iterator it;
62 //is this a type this command creates
63 it = outputTypes.find(type);
64 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
66 if (type == "amova") { tag = "amova"; }
67 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file.\n"); }
72 m->errorOut(e, "AmovaCommand", "getOutputFileNameTag");
76 //**********************************************************************************************************************
77 AmovaCommand::AmovaCommand(){
79 abort = true; calledHelp = true;
81 vector<string> tempOutNames;
82 outputTypes["amova"] = tempOutNames;
85 m->errorOut(e, "AmovaCommand", "AmovaCommand");
89 //**********************************************************************************************************************
90 AmovaCommand::AmovaCommand(string option) {
92 abort = false; calledHelp = false;
94 //allow user to run help
95 if(option == "help") { help(); abort = true; calledHelp = true; }
96 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
99 vector<string> myArray = setParameters();
101 OptionParser parser(option);
102 map<string,string> parameters = parser.getParameters();
104 ValidParameters validParameter;
106 //check to make sure all parameters are valid for command
107 map<string,string>::iterator it;
108 for (it = parameters.begin(); it != parameters.end(); it++) {
109 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
112 //initialize outputTypes
113 vector<string> tempOutNames;
114 outputTypes["amova"] = tempOutNames;
116 //if the user changes the output directory command factory will send this info to us in the output parameter
117 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
119 //if the user changes the input directory command factory will send this info to us in the output parameter
120 string inputDir = validParameter.validFile(parameters, "inputdir", false);
121 if (inputDir == "not found"){ inputDir = ""; }
124 it = parameters.find("design");
125 //user has given a template file
126 if(it != parameters.end()){
127 path = m->hasPath(it->second);
128 //if the user has not given a path then, add inputdir. else leave path alone.
129 if (path == "") { parameters["design"] = inputDir + it->second; }
132 it = parameters.find("phylip");
133 //user has given a template file
134 if(it != parameters.end()){
135 path = m->hasPath(it->second);
136 //if the user has not given a path then, add inputdir. else leave path alone.
137 if (path == "") { parameters["phylip"] = inputDir + it->second; }
141 phylipFileName = validParameter.validFile(parameters, "phylip", true);
142 if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
143 else if (phylipFileName == "not found") {
144 //if there is a current phylip file, use it
145 phylipFileName = m->getPhylipFile();
146 if (phylipFileName != "") { m->mothurOut("Using " + phylipFileName + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
147 else { m->mothurOut("You have no current phylip file and the phylip parameter is required."); m->mothurOutEndLine(); abort = true; }
148 }else { m->setPhylipFile(phylipFileName); }
150 //check for required parameters
151 designFileName = validParameter.validFile(parameters, "design", true);
152 if (designFileName == "not open") { designFileName = ""; abort = true; }
153 else if (designFileName == "not found") {
154 //if there is a current design file, use it
155 designFileName = m->getDesignFile();
156 if (designFileName != "") { m->mothurOut("Using " + designFileName + " as input file for the design parameter."); m->mothurOutEndLine(); }
157 else { m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
158 }else { m->setDesignFile(designFileName); }
160 string temp = validParameter.validFile(parameters, "iters", false);
161 if (temp == "not found") { temp = "1000"; }
162 m->mothurConvert(temp, iters);
164 temp = validParameter.validFile(parameters, "alpha", false);
165 if (temp == "not found") { temp = "0.05"; }
166 m->mothurConvert(temp, experimentwiseAlpha);
168 string sets = validParameter.validFile(parameters, "sets", false);
169 if (sets == "not found") { sets = ""; }
171 m->splitAtDash(sets, Sets);
175 catch(exception& e) {
176 m->errorOut(e, "AmovaCommand", "AmovaCommand");
180 //**********************************************************************************************************************
182 int AmovaCommand::execute(){
185 if (abort == true) { if (calledHelp) { return 0; } return 2; }
188 designMap = new GroupMap(designFileName);
189 designMap->readDesignMap();
191 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
193 //read in distance matrix and square it
194 ReadPhylipVector readMatrix(phylipFileName);
195 vector<string> sampleNames = readMatrix.read(distanceMatrix);
197 if (Sets.size() != 0) { //user selected sets, so we want to remove the samples not in those sets
199 vector<string> dGroups = designMap->getNamesOfGroups();
200 util.setGroups(Sets, dGroups);
202 for(int i=0;i<distanceMatrix.size();i++){
204 if (m->control_pressed) { delete designMap; return 0; }
206 string group = designMap->getGroup(sampleNames[i]);
208 if (group == "not found") {
209 m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
210 }else if (!m->inUsersGroups(group, Sets)){ //not in set we want remove it
211 //remove from all other rows
212 for(int j=0;j<distanceMatrix.size();j++){
213 distanceMatrix[j].erase(distanceMatrix[j].begin()+i);
215 distanceMatrix.erase(distanceMatrix.begin()+i);
216 sampleNames.erase(sampleNames.begin()+i);
222 for(int i=0;i<distanceMatrix.size();i++){
223 for(int j=0;j<i;j++){
224 distanceMatrix[i][j] *= distanceMatrix[i][j];
228 //link designMap to rows/columns in distance matrix
229 map<string, vector<int> > origGroupSampleMap;
230 for(int i=0;i<sampleNames.size();i++){
231 string group = designMap->getGroup(sampleNames[i]);
233 if (group == "not found") {
234 m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
235 }else { origGroupSampleMap[group].push_back(i); }
238 int numGroups = origGroupSampleMap.size();
240 if (m->control_pressed) { delete designMap; return 0; }
242 //create a new filename
244 string AMOVAFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName)) + getOutputFileNameTag("amova");
245 m->openOutputFile(AMOVAFileName, AMOVAFile);
246 outputNames.push_back(AMOVAFileName); outputTypes["amova"].push_back(AMOVAFileName);
248 double fullANOVAPValue = runAMOVA(AMOVAFile, origGroupSampleMap, experimentwiseAlpha);
249 if(fullANOVAPValue <= experimentwiseAlpha && numGroups > 2){
251 int numCombos = numGroups * (numGroups-1) / 2;
252 double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
254 map<string, vector<int> >::iterator itA;
255 map<string, vector<int> >::iterator itB;
257 for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
259 for(itB;itB!=origGroupSampleMap.end();itB++){
261 map<string, vector<int> > pairwiseGroupSampleMap;
262 pairwiseGroupSampleMap[itA->first] = itA->second;
263 pairwiseGroupSampleMap[itB->first] = itB->second;
265 runAMOVA(AMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha);
268 m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
269 m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
272 m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
274 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
279 m->mothurOutEndLine();
280 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
281 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
282 m->mothurOutEndLine();
286 catch(exception& e) {
287 m->errorOut(e, "AmovaCommand", "execute");
292 //**********************************************************************************************************************
294 double AmovaCommand::runAMOVA(ofstream& AMOVAFile, map<string, vector<int> > groupSampleMap, double alpha) {
296 map<string, vector<int> >::iterator it;
298 int numGroups = groupSampleMap.size();
299 int totalNumSamples = 0;
301 for(it = groupSampleMap.begin();it!=groupSampleMap.end();it++){
302 totalNumSamples += it->second.size();
305 double ssTotalOrig = calcSSTotal(groupSampleMap);
306 double ssWithinOrig = calcSSWithin(groupSampleMap);
307 double ssAmongOrig = ssTotalOrig - ssWithinOrig;
310 for(int i=0;i<iters;i++){
311 map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
312 double ssWithinRand = calcSSWithin(randomizedGroup);
313 if(ssWithinRand < ssWithinOrig){ counter++; }
316 double pValue = (double)counter / (double) iters;
318 if(pValue < 1/(double)iters){ pString = '<' + toString(1/(double)iters); }
319 else { pString = toString(pValue); }
323 it = groupSampleMap.begin();
324 AMOVAFile << it->first;
325 m->mothurOut(it->first);
327 for(it;it!=groupSampleMap.end();it++){
328 AMOVAFile << '-' << it->first;
329 m->mothurOut('-' + it->first);
332 AMOVAFile << "\tAmong\tWithin\tTotal" << endl;
333 m->mothurOut("\tAmong\tWithin\tTotal\n");
335 AMOVAFile << "SS\t" << ssAmongOrig << '\t' << ssWithinOrig << '\t' << ssTotalOrig << endl;
336 m->mothurOut("SS\t" + toString(ssAmongOrig) + '\t' + toString(ssWithinOrig) + '\t' + toString(ssTotalOrig) + '\n');
338 int dfAmong = numGroups - 1; double MSAmong = ssAmongOrig / (double) dfAmong;
339 int dfWithin = totalNumSamples - numGroups; double MSWithin = ssWithinOrig / (double) dfWithin;
340 int dfTotal = totalNumSamples - 1; double Fs = MSAmong / MSWithin;
342 AMOVAFile << "df\t" << dfAmong << '\t' << dfWithin << '\t' << dfTotal << endl;
343 m->mothurOut("df\t" + toString(dfAmong) + '\t' + toString(dfWithin) + '\t' + toString(dfTotal) + '\n');
345 AMOVAFile << "MS\t" << MSAmong << '\t' << MSWithin << endl << endl;
346 m->mothurOut("MS\t" + toString(MSAmong) + '\t' + toString(MSWithin) + "\n\n");
348 AMOVAFile << "Fs:\t" << Fs << endl;
349 m->mothurOut("Fs:\t" + toString(Fs) + '\n');
351 AMOVAFile << "p-value: " << pString;
352 m->mothurOut("p-value: " + pString);
358 AMOVAFile << endl << endl;
359 m->mothurOutEndLine();m->mothurOutEndLine();
363 catch(exception& e) {
364 m->errorOut(e, "AmovaCommand", "runAMOVA");
369 //**********************************************************************************************************************
371 map<string, vector<int> > AmovaCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
373 vector<int> sampleIndices;
374 vector<int> samplesPerGroup;
376 map<string, vector<int> >::iterator it;
377 for(it=origMapping.begin();it!=origMapping.end();it++){
378 vector<int> indices = it->second;
379 samplesPerGroup.push_back(indices.size());
380 sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
383 random_shuffle(sampleIndices.begin(), sampleIndices.end());
386 map<string, vector<int> > randomizedGroups = origMapping;
387 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
388 for(int i=0;i<it->second.size();i++){
389 it->second[i] = sampleIndices[index++];
393 return randomizedGroups;
395 catch (exception& e) {
396 m->errorOut(e, "AmovaCommand", "getRandomizedGroups");
401 //**********************************************************************************************************************
403 double AmovaCommand::calcSSTotal(map<string, vector<int> >& groupSampleMap) {
407 map<string, vector<int> >::iterator it;
408 for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
409 indices.insert(indices.end(), it->second.begin(), it->second.end());
411 sort(indices.begin(), indices.end());
413 int numIndices =indices.size();
414 double ssTotal = 0.0;
416 for(int i=1;i<numIndices;i++){
417 int row = indices[i];
419 for(int j=0;j<i;j++){
420 ssTotal += distanceMatrix[row][indices[j]];
423 ssTotal /= numIndices;
427 catch(exception& e) {
428 m->errorOut(e, "AmovaCommand", "calcSSTotal");
433 //**********************************************************************************************************************
435 double AmovaCommand::calcSSWithin(map<string, vector<int> >& groupSampleMap) {
438 double ssWithin = 0.0;
440 map<string, vector<int> >::iterator it;
441 for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
443 double withinGroup = 0;
445 vector<int> samples = it->second;
447 for(int i=0;i<samples.size();i++){
448 int row = samples[i];
450 for(int j=0;j<samples.size();j++){
451 int col = samples[j];
454 withinGroup += distanceMatrix[row][col];
460 ssWithin += withinGroup / samples.size();
465 catch(exception& e) {
466 m->errorOut(e, "AmovaCommand", "calcSSWithin");
471 //**********************************************************************************************************************