]> git.donarmstrong.com Git - mothur.git/blob - amovacommand.cpp
adding current file class
[mothur.git] / amovacommand.cpp
1 /*
2  *  amovacommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 2/7/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "amovacommand.h"
11 #include "readphylipvector.h"
12 #include "groupmap.h"
13
14 //**********************************************************************************************************************
15 vector<string> AmovaCommand::getValidParameters(){      
16         try {
17                 string Array[] =  {"outputdir","iters","phylip","design","alpha", "inputdir"};
18                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
19                 return myArray;
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "AmovaCommand", "getValidParameters");
23                 exit(1);
24         }
25 }
26 //**********************************************************************************************************************
27 AmovaCommand::AmovaCommand(){   
28         try {
29                 abort = true; calledHelp = true; 
30                 vector<string> tempOutNames;
31                 outputTypes["amova"] = tempOutNames;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "AmovaCommand", "AmovaCommand");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 vector<string> AmovaCommand::getRequiredParameters(){   
40         try {
41                 string Array[] =  {"design"};
42                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
43                 return myArray;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "AmovaCommand", "getRequiredParameters");
47                 exit(1);
48         }
49 }
50 //**********************************************************************************************************************
51 vector<string> AmovaCommand::getRequiredFiles(){        
52         try {
53                 string Array[] =  {};
54                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
55                 return myArray;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "AmovaCommand", "getRequiredFiles");
59                 exit(1);
60         }
61 }
62 //**********************************************************************************************************************
63
64 AmovaCommand::AmovaCommand(string option) {
65         try {
66                 abort = false; calledHelp = false;   
67                 
68                 //allow user to run help
69                 if(option == "help") { help(); abort = true; calledHelp = true; }
70                 
71                 else {
72                         //valid paramters for this command
73                         string AlignArray[] =  {"design","outputdir","iters","phylip","alpha", "inputdir"};
74                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
75                         
76                         OptionParser parser(option);
77                         map<string,string> parameters = parser.getParameters();
78                         
79                         ValidParameters validParameter;
80                         
81                         //check to make sure all parameters are valid for command
82                         map<string,string>::iterator it;
83                         for (it = parameters.begin(); it != parameters.end(); it++) { 
84                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
85                         }
86                         
87                         //initialize outputTypes
88                         vector<string> tempOutNames;
89                         outputTypes["amova"] = tempOutNames;
90                         
91                         //if the user changes the output directory command factory will send this info to us in the output parameter 
92                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
93                         
94                         //if the user changes the input directory command factory will send this info to us in the output parameter 
95                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
96                         if (inputDir == "not found"){   inputDir = "";          }
97                         else {
98                                 string path;
99                                 it = parameters.find("design");
100                                 //user has given a template file
101                                 if(it != parameters.end()){ 
102                                         path = m->hasPath(it->second);
103                                         //if the user has not given a path then, add inputdir. else leave path alone.
104                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
105                                 }
106                                 
107                                 it = parameters.find("phylip");
108                                 //user has given a template file
109                                 if(it != parameters.end()){ 
110                                         path = m->hasPath(it->second);
111                                         //if the user has not given a path then, add inputdir. else leave path alone.
112                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
113                                 }
114                         }
115                         
116                         phylipFileName = validParameter.validFile(parameters, "phylip", true);
117                         if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
118                         else if (phylipFileName == "not found") { 
119                                 phylipFileName = ""; 
120                                 
121                                 //check currentFiles for a phylip file
122                                 //if (currentFiles->getPhylipFile() != "") {  phylipFileName = currentFiles->getPhylipFile(); m->mothurOut("Using " + phylipFileName + " as phylip file."); m->mothurOutEndLine();
123                                 //}else { m->mothurOut("You must provide an phylip file."); m->mothurOutEndLine(); abort = true;  }
124                         }       
125                         
126                         //check for required parameters
127                         designFileName = validParameter.validFile(parameters, "design", true);
128                         if (designFileName == "not open") { abort = true; }
129                         else if (designFileName == "not found") {
130                                 designFileName = "";
131                                 
132                                 //check currentFiles for a design file
133                                 //if (currentFiles->getDesignFile() != "") {  designFileName = currentFiles->getDesignFile(); m->mothurOut("Using " + designFileName + " as design file."); m->mothurOutEndLine();
134                                 //}else { m->mothurOut("You must provide an design file."); m->mothurOutEndLine(); abort = true;  }
135                         }       
136
137                         string temp = validParameter.validFile(parameters, "iters", false);
138                         if (temp == "not found") { temp = "1000"; }
139                         convert(temp, iters); 
140                         
141                         temp = validParameter.validFile(parameters, "alpha", false);
142                         if (temp == "not found") { temp = "0.05"; }
143                         convert(temp, experimentwiseAlpha); 
144                 }
145         }
146         catch(exception& e) {
147                 m->errorOut(e, "AmovaCommand", "AmovaCommand");
148                 exit(1);
149         }
150 }
151
152 //**********************************************************************************************************************
153
154 void AmovaCommand::help(){
155         try {
156                 m->mothurOut("Referenced: Anderson MJ (2001). A new method for non-parametric multivariate analysis of variance. Austral Ecol 26: 32-46.\n");
157                 m->mothurOut("The amova command outputs a .amova file. \n");
158                 m->mothurOut("The amova command parameters are phylip, iters, and alpha.  The phylip and design parameters are required.\n");
159                 m->mothurOut("The design parameter allows you to assign your samples to groups when you are running amova. It is required. \n");
160                 m->mothurOut("The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.\n");
161                 m->mothurOut("The iters parameter allows you to set number of randomization for the P value.  The default is 1000. \n");
162                 m->mothurOut("The amova command should be in the following format: amova(phylip=file.dist, design=file.design).\n");
163                 m->mothurOut("Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).\n\n");
164         }
165         catch(exception& e) {
166                 m->errorOut(e, "AmovaCommand", "help");
167                 exit(1);
168         }
169 }
170
171 //**********************************************************************************************************************
172
173 AmovaCommand::~AmovaCommand(){}
174
175 //**********************************************************************************************************************
176
177 int AmovaCommand::execute(){
178         try {
179                 
180                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
181                 
182                 //read design file
183                 designMap = new GroupMap(designFileName);
184                 designMap->readDesignMap();
185
186                 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
187                                                 
188                 //read in distance matrix and square it
189                 ReadPhylipVector readMatrix(phylipFileName);
190                 vector<string> sampleNames = readMatrix.read(distanceMatrix);
191                 
192                 for(int i=0;i<distanceMatrix.size();i++){
193                         for(int j=0;j<i;j++){
194                                 distanceMatrix[i][j] *= distanceMatrix[i][j];   
195                         }
196                 }
197                 
198                 //link designMap to rows/columns in distance matrix
199                 map<string, vector<int> > origGroupSampleMap;
200                 for(int i=0;i<sampleNames.size();i++){
201                         origGroupSampleMap[designMap->getGroup(sampleNames[i])].push_back(i);
202                 }
203                 int numGroups = origGroupSampleMap.size();
204                 
205                 //create a new filename
206                 ofstream AMOVAFile;
207                 string AMOVAFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName))  + "amova";                         
208                 m->openOutputFile(AMOVAFileName, AMOVAFile);
209                 outputNames.push_back(AMOVAFileName); outputTypes["amova"].push_back(AMOVAFileName);
210                 
211                 double fullANOVAPValue = runAMOVA(AMOVAFile, origGroupSampleMap, experimentwiseAlpha);
212                 if(fullANOVAPValue <= experimentwiseAlpha && numGroups > 2){
213                         
214                         int numCombos = numGroups * (numGroups-1) / 2;
215                         double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
216                         
217                         map<string, vector<int> >::iterator itA;
218                         map<string, vector<int> >::iterator itB;
219                         
220                         for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
221                                 itB = itA;itB++;
222                                 for(itB;itB!=origGroupSampleMap.end();itB++){
223                                         
224                                         map<string, vector<int> > pairwiseGroupSampleMap;
225                                         pairwiseGroupSampleMap[itA->first] = itA->second;
226                                         pairwiseGroupSampleMap[itB->first] = itB->second;
227                                         
228                                         runAMOVA(AMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha);
229                                 }                       
230                         }
231                         m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
232                         m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
233                 }
234                 else{
235                         m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
236                 }
237                 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
238                 AMOVAFile.close();
239                 
240                 delete designMap;
241          
242                 m->mothurOutEndLine();
243                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
244                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
245                 m->mothurOutEndLine();
246                 
247                 return 0;
248         }
249         catch(exception& e) {
250                 m->errorOut(e, "AmovaCommand", "execute");
251                 exit(1);
252         }
253 }
254
255 //**********************************************************************************************************************
256
257 double AmovaCommand::runAMOVA(ofstream& AMOVAFile, map<string, vector<int> > groupSampleMap, double alpha) {
258         try {
259                 map<string, vector<int> >::iterator it;
260
261                 int numGroups = groupSampleMap.size();
262                 int totalNumSamples = 0;
263
264                 for(it = groupSampleMap.begin();it!=groupSampleMap.end();it++){
265                         totalNumSamples += it->second.size();                   
266                 }
267
268                 double ssTotalOrig = calcSSTotal(groupSampleMap);
269                 double ssWithinOrig = calcSSWithin(groupSampleMap);
270                 double ssAmongOrig = ssTotalOrig - ssWithinOrig;
271                 
272                 double counter = 0;
273                 for(int i=0;i<iters;i++){
274                         map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
275                         double ssWithinRand = calcSSWithin(randomizedGroup);
276                         if(ssWithinRand < ssWithinOrig){        counter++;      }
277                 }
278                 
279                 double pValue = (double)counter / (double) iters;
280                 string pString = "";
281                 if(pValue < 1/(double)iters){   pString = '<' + toString(1/(double)iters);      }
282                 else                                            {       pString = toString(pValue);                                     }
283                 
284                 
285                 //print anova table
286                 it = groupSampleMap.begin();
287                 AMOVAFile << it->first;
288                 m->mothurOut(it->first);
289                 it++;
290                 for(it;it!=groupSampleMap.end();it++){
291                         AMOVAFile << '-' << it->first;
292                         m->mothurOut('-' + it->first);
293                 }
294                 
295                 AMOVAFile << "\tAmong\tWithin\tTotal" << endl;
296                 m->mothurOut("\tAmong\tWithin\tTotal\n");
297                 
298                 AMOVAFile << "SS\t" << ssAmongOrig << '\t' << ssWithinOrig << '\t' << ssTotalOrig << endl;
299                 m->mothurOut("SS\t" + toString(ssAmongOrig) + '\t' + toString(ssWithinOrig) + '\t' + toString(ssTotalOrig) + '\n');
300                 
301                 int dfAmong = numGroups - 1;                            double MSAmong = ssAmongOrig / (double) dfAmong;
302                 int dfWithin = totalNumSamples - numGroups;     double MSWithin = ssWithinOrig / (double) dfWithin;
303                 int dfTotal = totalNumSamples - 1;                      double Fs = MSAmong / MSWithin;
304                 
305                 AMOVAFile << "df\t" << dfAmong << '\t' << dfWithin << '\t' << dfTotal << endl;
306                 m->mothurOut("df\t" + toString(dfAmong) + '\t' + toString(dfWithin) + '\t' + toString(dfTotal) + '\n');
307
308                 AMOVAFile << "MS\t" << MSAmong << '\t' << MSWithin << endl << endl;
309                 m->mothurOut("MS\t" + toString(MSAmong) + '\t' + toString(MSWithin) + "\n\n");
310
311                 AMOVAFile << "Fs:\t" << Fs << endl;
312                 m->mothurOut("Fs:\t" + toString(Fs) + '\n');
313                 
314                 AMOVAFile << "p-value: " << pString;
315                 m->mothurOut("p-value: " + pString);
316
317                 if(pValue < alpha){
318                         AMOVAFile << "*";
319                         m->mothurOut("*");
320                 }
321                 AMOVAFile << endl << endl;
322                 m->mothurOutEndLine();m->mothurOutEndLine();
323
324                 return pValue;
325         }
326         catch(exception& e) {
327                 m->errorOut(e, "AmovaCommand", "runAMOVA");
328                 exit(1);
329         }
330 }
331
332 //**********************************************************************************************************************
333
334 map<string, vector<int> > AmovaCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
335         try{
336                 vector<int> sampleIndices;
337                 vector<int> samplesPerGroup;
338                 
339                 map<string, vector<int> >::iterator it;
340                 for(it=origMapping.begin();it!=origMapping.end();it++){
341                         vector<int> indices = it->second;
342                         samplesPerGroup.push_back(indices.size());
343                         sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
344                 }
345                 
346                 random_shuffle(sampleIndices.begin(), sampleIndices.end());
347                 
348                 int index = 0;
349                 map<string, vector<int> > randomizedGroups = origMapping;
350                 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
351                         for(int i=0;i<it->second.size();i++){
352                                 it->second[i] = sampleIndices[index++];                         
353                         }
354                 }
355
356                 return randomizedGroups;                
357         }
358         catch (exception& e) {
359                 m->errorOut(e, "AmovaCommand", "getRandomizedGroups");
360                 exit(1);
361         }
362 }
363
364 //**********************************************************************************************************************
365
366 double AmovaCommand::calcSSTotal(map<string, vector<int> >& groupSampleMap) {
367         try {
368                 
369                 vector<int> indices;
370                 map<string, vector<int> >::iterator it;
371                 for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
372                         indices.insert(indices.end(), it->second.begin(), it->second.end());                    
373                 }
374                 sort(indices.begin(), indices.end());
375                         
376                 int numIndices =indices.size();
377                 double ssTotal = 0.0;
378                 
379                 for(int i=1;i<numIndices;i++){
380                         int row = indices[i];
381                         
382                         for(int j=0;j<i;j++){
383                                 ssTotal += distanceMatrix[row][indices[j]];
384                         }
385                 }
386                 ssTotal /= numIndices;
387                         
388                 return ssTotal;
389         }
390         catch(exception& e) {
391                 m->errorOut(e, "AmovaCommand", "calcSSTotal");
392                 exit(1);
393         }
394 }
395
396 //**********************************************************************************************************************
397
398 double AmovaCommand::calcSSWithin(map<string, vector<int> >& groupSampleMap) {
399         try {
400
401                 double ssWithin = 0.0;
402                 
403                 map<string, vector<int> >::iterator it;
404                 for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
405                         
406                         double withinGroup = 0;
407                         
408                         vector<int> samples = it->second;
409                         
410                         for(int i=0;i<samples.size();i++){
411                                 int row = samples[i];
412
413                                 for(int j=0;j<samples.size();j++){
414                                         int col = samples[j];
415
416                                         if(col < row){
417                                                 withinGroup += distanceMatrix[row][col];
418                                         }
419                                         
420                                 }
421                         }
422
423                         ssWithin += withinGroup / samples.size();
424                 }
425
426                 return ssWithin;
427         }
428         catch(exception& e) {
429                 m->errorOut(e, "AmovaCommand", "calcSSWithin");
430                 exit(1);
431         }
432 }
433
434 //**********************************************************************************************************************