]> git.donarmstrong.com Git - mothur.git/blob - amovacommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / amovacommand.cpp
1 /*
2  *  amovacommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 2/7/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "amovacommand.h"
11 #include "readphylipvector.h"
12 #include "groupmap.h"
13
14
15 //**********************************************************************************************************************
16 vector<string> AmovaCommand::setParameters(){   
17         try {
18                 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
19                 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pphylip);
20                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
21                 CommandParameter palpha("alpha", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(palpha);
22                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
23                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
24         
25                 vector<string> myArray;
26                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "AmovaCommand", "setParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 string AmovaCommand::getHelpString(){   
36         try {
37                 string helpString = "";
38                 helpString += "Referenced: Anderson MJ (2001). A new method for non-parametric multivariate analysis of variance. Austral Ecol 26: 32-46.";
39                 helpString += "The amova command outputs a .amova file.";
40                 helpString += "The amova command parameters are phylip, iters, and alpha.  The phylip and design parameters are required, unless you have valid current files.";
41                 helpString += "The design parameter allows you to assign your samples to groups when you are running amova. It is required.";
42                 helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.";
43                 helpString += "The iters parameter allows you to set number of randomization for the P value.  The default is 1000.";
44                 helpString += "The amova command should be in the following format: amova(phylip=file.dist, design=file.design).";
45                 helpString += "Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).";
46                 return helpString;
47         }
48         catch(exception& e) {
49                 m->errorOut(e, "AmovaCommand", "getHelpString");
50                 exit(1);
51         }
52 }
53 //**********************************************************************************************************************
54 string AmovaCommand::getOutputFileNameTag(string type, string inputName=""){    
55         try {
56         string tag = "";
57                 map<string, vector<string> >::iterator it;
58         
59         //is this a type this command creates
60         it = outputTypes.find(type);
61         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
62         else {
63             if (type == "amova") {  tag = "amova"; }
64             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file.\n");  }
65         }
66         return tag;
67         }
68         catch(exception& e) {
69                 m->errorOut(e, "AmovaCommand", "getOutputFileNameTag");
70                 exit(1);
71         }
72 }
73 //**********************************************************************************************************************
74 AmovaCommand::AmovaCommand(){   
75         try {
76                 abort = true; calledHelp = true; 
77                 setParameters();
78                 vector<string> tempOutNames;
79                 outputTypes["amova"] = tempOutNames;
80         }
81         catch(exception& e) {
82                 m->errorOut(e, "AmovaCommand", "AmovaCommand");
83                 exit(1);
84         }
85 }
86 //**********************************************************************************************************************
87 AmovaCommand::AmovaCommand(string option) {
88         try {
89                 abort = false; calledHelp = false;   
90                 
91                 //allow user to run help
92                 if(option == "help") { help(); abort = true; calledHelp = true; }
93                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
94                 
95                 else {
96                         vector<string> myArray = setParameters();
97                         
98                         OptionParser parser(option);
99                         map<string,string> parameters = parser.getParameters();
100                         
101                         ValidParameters validParameter;
102                         
103                         //check to make sure all parameters are valid for command
104                         map<string,string>::iterator it;
105                         for (it = parameters.begin(); it != parameters.end(); it++) { 
106                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
107                         }
108                         
109                         //initialize outputTypes
110                         vector<string> tempOutNames;
111                         outputTypes["amova"] = tempOutNames;
112                         
113                         //if the user changes the output directory command factory will send this info to us in the output parameter 
114                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
115                         
116                         //if the user changes the input directory command factory will send this info to us in the output parameter 
117                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
118                         if (inputDir == "not found"){   inputDir = "";          }
119                         else {
120                                 string path;
121                                 it = parameters.find("design");
122                                 //user has given a template file
123                                 if(it != parameters.end()){ 
124                                         path = m->hasPath(it->second);
125                                         //if the user has not given a path then, add inputdir. else leave path alone.
126                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
127                                 }
128                                 
129                                 it = parameters.find("phylip");
130                                 //user has given a template file
131                                 if(it != parameters.end()){ 
132                                         path = m->hasPath(it->second);
133                                         //if the user has not given a path then, add inputdir. else leave path alone.
134                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
135                                 }
136                         }
137                         
138                         phylipFileName = validParameter.validFile(parameters, "phylip", true);
139                         if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
140                         else if (phylipFileName == "not found") { 
141                                 //if there is a current phylip file, use it
142                                 phylipFileName = m->getPhylipFile(); 
143                                 if (phylipFileName != "") { m->mothurOut("Using " + phylipFileName + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
144                                 else {  m->mothurOut("You have no current phylip file and the phylip parameter is required."); m->mothurOutEndLine(); abort = true; }
145                         }else { m->setPhylipFile(phylipFileName); }
146                         
147                         //check for required parameters
148                         designFileName = validParameter.validFile(parameters, "design", true);
149                         if (designFileName == "not open") { designFileName = ""; abort = true; }
150                         else if (designFileName == "not found") {
151                                 //if there is a current design file, use it
152                                 designFileName = m->getDesignFile(); 
153                                 if (designFileName != "") { m->mothurOut("Using " + designFileName + " as input file for the design parameter."); m->mothurOutEndLine(); }
154                                 else {  m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }                           
155                         }else { m->setDesignFile(designFileName); }     
156
157                         string temp = validParameter.validFile(parameters, "iters", false);
158                         if (temp == "not found") { temp = "1000"; }
159                         m->mothurConvert(temp, iters); 
160                         
161                         temp = validParameter.validFile(parameters, "alpha", false);
162                         if (temp == "not found") { temp = "0.05"; }
163                         m->mothurConvert(temp, experimentwiseAlpha); 
164                 }
165         }
166         catch(exception& e) {
167                 m->errorOut(e, "AmovaCommand", "AmovaCommand");
168                 exit(1);
169         }
170 }
171 //**********************************************************************************************************************
172
173 int AmovaCommand::execute(){
174         try {
175                 
176                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
177                 
178                 //read design file
179                 designMap = new GroupMap(designFileName);
180                 designMap->readDesignMap();
181
182                 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
183                                                 
184                 //read in distance matrix and square it
185                 ReadPhylipVector readMatrix(phylipFileName);
186                 vector<string> sampleNames = readMatrix.read(distanceMatrix);
187                 
188                 for(int i=0;i<distanceMatrix.size();i++){
189                         for(int j=0;j<i;j++){
190                                 distanceMatrix[i][j] *= distanceMatrix[i][j];   
191                         }
192                 }
193                 
194                 //link designMap to rows/columns in distance matrix
195                 map<string, vector<int> > origGroupSampleMap;
196                 for(int i=0;i<sampleNames.size();i++){
197                         string group = designMap->getGroup(sampleNames[i]);
198                         
199                         if (group == "not found") {
200                                 m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
201                         }else { origGroupSampleMap[group].push_back(i); }
202                         
203                 }
204                 int numGroups = origGroupSampleMap.size();
205                 
206                 if (m->control_pressed) { delete designMap; return 0; }
207                 
208                 //create a new filename
209                 ofstream AMOVAFile;
210                 string AMOVAFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName)) + getOutputFileNameTag("amova");                            
211                 m->openOutputFile(AMOVAFileName, AMOVAFile);
212                 outputNames.push_back(AMOVAFileName); outputTypes["amova"].push_back(AMOVAFileName);
213                 
214                 double fullANOVAPValue = runAMOVA(AMOVAFile, origGroupSampleMap, experimentwiseAlpha);
215                 if(fullANOVAPValue <= experimentwiseAlpha && numGroups > 2){
216                         
217                         int numCombos = numGroups * (numGroups-1) / 2;
218                         double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
219                         
220                         map<string, vector<int> >::iterator itA;
221                         map<string, vector<int> >::iterator itB;
222                         
223                         for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
224                                 itB = itA;itB++;
225                                 for(itB;itB!=origGroupSampleMap.end();itB++){
226                                         
227                                         map<string, vector<int> > pairwiseGroupSampleMap;
228                                         pairwiseGroupSampleMap[itA->first] = itA->second;
229                                         pairwiseGroupSampleMap[itB->first] = itB->second;
230                                         
231                                         runAMOVA(AMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha);
232                                 }                       
233                         }
234                         m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
235                         m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
236                 }
237                 else{
238                         m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
239                 }
240                 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
241                 AMOVAFile.close();
242                 
243                 delete designMap;
244          
245                 m->mothurOutEndLine();
246                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
247                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
248                 m->mothurOutEndLine();
249                 
250                 return 0;
251         }
252         catch(exception& e) {
253                 m->errorOut(e, "AmovaCommand", "execute");
254                 exit(1);
255         }
256 }
257
258 //**********************************************************************************************************************
259
260 double AmovaCommand::runAMOVA(ofstream& AMOVAFile, map<string, vector<int> > groupSampleMap, double alpha) {
261         try {
262                 map<string, vector<int> >::iterator it;
263
264                 int numGroups = groupSampleMap.size();
265                 int totalNumSamples = 0;
266
267                 for(it = groupSampleMap.begin();it!=groupSampleMap.end();it++){
268                         totalNumSamples += it->second.size();                   
269                 }
270
271                 double ssTotalOrig = calcSSTotal(groupSampleMap);
272                 double ssWithinOrig = calcSSWithin(groupSampleMap);
273                 double ssAmongOrig = ssTotalOrig - ssWithinOrig;
274                 
275                 double counter = 0;
276                 for(int i=0;i<iters;i++){
277                         map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
278                         double ssWithinRand = calcSSWithin(randomizedGroup);
279                         if(ssWithinRand < ssWithinOrig){        counter++;      }
280                 }
281                 
282                 double pValue = (double)counter / (double) iters;
283                 string pString = "";
284                 if(pValue < 1/(double)iters){   pString = '<' + toString(1/(double)iters);      }
285                 else                                            {       pString = toString(pValue);                                     }
286                 
287                 
288                 //print anova table
289                 it = groupSampleMap.begin();
290                 AMOVAFile << it->first;
291                 m->mothurOut(it->first);
292                 it++;
293                 for(it;it!=groupSampleMap.end();it++){
294                         AMOVAFile << '-' << it->first;
295                         m->mothurOut('-' + it->first);
296                 }
297                 
298                 AMOVAFile << "\tAmong\tWithin\tTotal" << endl;
299                 m->mothurOut("\tAmong\tWithin\tTotal\n");
300                 
301                 AMOVAFile << "SS\t" << ssAmongOrig << '\t' << ssWithinOrig << '\t' << ssTotalOrig << endl;
302                 m->mothurOut("SS\t" + toString(ssAmongOrig) + '\t' + toString(ssWithinOrig) + '\t' + toString(ssTotalOrig) + '\n');
303                 
304                 int dfAmong = numGroups - 1;                            double MSAmong = ssAmongOrig / (double) dfAmong;
305                 int dfWithin = totalNumSamples - numGroups;     double MSWithin = ssWithinOrig / (double) dfWithin;
306                 int dfTotal = totalNumSamples - 1;                      double Fs = MSAmong / MSWithin;
307                 
308                 AMOVAFile << "df\t" << dfAmong << '\t' << dfWithin << '\t' << dfTotal << endl;
309                 m->mothurOut("df\t" + toString(dfAmong) + '\t' + toString(dfWithin) + '\t' + toString(dfTotal) + '\n');
310
311                 AMOVAFile << "MS\t" << MSAmong << '\t' << MSWithin << endl << endl;
312                 m->mothurOut("MS\t" + toString(MSAmong) + '\t' + toString(MSWithin) + "\n\n");
313
314                 AMOVAFile << "Fs:\t" << Fs << endl;
315                 m->mothurOut("Fs:\t" + toString(Fs) + '\n');
316                 
317                 AMOVAFile << "p-value: " << pString;
318                 m->mothurOut("p-value: " + pString);
319
320                 if(pValue < alpha){
321                         AMOVAFile << "*";
322                         m->mothurOut("*");
323                 }
324                 AMOVAFile << endl << endl;
325                 m->mothurOutEndLine();m->mothurOutEndLine();
326
327                 return pValue;
328         }
329         catch(exception& e) {
330                 m->errorOut(e, "AmovaCommand", "runAMOVA");
331                 exit(1);
332         }
333 }
334
335 //**********************************************************************************************************************
336
337 map<string, vector<int> > AmovaCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
338         try{
339                 vector<int> sampleIndices;
340                 vector<int> samplesPerGroup;
341                 
342                 map<string, vector<int> >::iterator it;
343                 for(it=origMapping.begin();it!=origMapping.end();it++){
344                         vector<int> indices = it->second;
345                         samplesPerGroup.push_back(indices.size());
346                         sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
347                 }
348                 
349                 random_shuffle(sampleIndices.begin(), sampleIndices.end());
350                 
351                 int index = 0;
352                 map<string, vector<int> > randomizedGroups = origMapping;
353                 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
354                         for(int i=0;i<it->second.size();i++){
355                                 it->second[i] = sampleIndices[index++];                         
356                         }
357                 }
358
359                 return randomizedGroups;                
360         }
361         catch (exception& e) {
362                 m->errorOut(e, "AmovaCommand", "getRandomizedGroups");
363                 exit(1);
364         }
365 }
366
367 //**********************************************************************************************************************
368
369 double AmovaCommand::calcSSTotal(map<string, vector<int> >& groupSampleMap) {
370         try {
371                 
372                 vector<int> indices;
373                 map<string, vector<int> >::iterator it;
374                 for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
375                         indices.insert(indices.end(), it->second.begin(), it->second.end());                    
376                 }
377                 sort(indices.begin(), indices.end());
378                         
379                 int numIndices =indices.size();
380                 double ssTotal = 0.0;
381                 
382                 for(int i=1;i<numIndices;i++){
383                         int row = indices[i];
384                         
385                         for(int j=0;j<i;j++){
386                                 ssTotal += distanceMatrix[row][indices[j]];
387                         }
388                 }
389                 ssTotal /= numIndices;
390                         
391                 return ssTotal;
392         }
393         catch(exception& e) {
394                 m->errorOut(e, "AmovaCommand", "calcSSTotal");
395                 exit(1);
396         }
397 }
398
399 //**********************************************************************************************************************
400
401 double AmovaCommand::calcSSWithin(map<string, vector<int> >& groupSampleMap) {
402         try {
403
404                 double ssWithin = 0.0;
405                 
406                 map<string, vector<int> >::iterator it;
407                 for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
408                         
409                         double withinGroup = 0;
410                         
411                         vector<int> samples = it->second;
412                         
413                         for(int i=0;i<samples.size();i++){
414                                 int row = samples[i];
415
416                                 for(int j=0;j<samples.size();j++){
417                                         int col = samples[j];
418
419                                         if(col < row){
420                                                 withinGroup += distanceMatrix[row][col];
421                                         }
422                                         
423                                 }
424                         }
425
426                         ssWithin += withinGroup / samples.size();
427                 }
428
429                 return ssWithin;
430         }
431         catch(exception& e) {
432                 m->errorOut(e, "AmovaCommand", "calcSSWithin");
433                 exit(1);
434         }
435 }
436
437 //**********************************************************************************************************************