]> git.donarmstrong.com Git - mothur.git/blob - amovacommand.cpp
added modify names parameter to set.dir
[mothur.git] / amovacommand.cpp
1 /*
2  *  amovacommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 2/7/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "amovacommand.h"
11 #include "readphylipvector.h"
12 #include "groupmap.h"
13 #include "sharedutilities.h"
14
15
16 //**********************************************************************************************************************
17 vector<string> AmovaCommand::setParameters(){   
18         try {
19                 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","amova",false,true,true); parameters.push_back(pdesign);
20         CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets);
21                 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none","amova",false,true,true); parameters.push_back(pphylip);
22                 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
23                 CommandParameter palpha("alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
26         
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "AmovaCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string AmovaCommand::getHelpString(){   
38         try {
39                 string helpString = "";
40                 helpString += "Referenced: Anderson MJ (2001). A new method for non-parametric multivariate analysis of variance. Austral Ecol 26: 32-46.";
41                 helpString += "The amova command outputs a .amova file.";
42                 helpString += "The amova command parameters are phylip, iters, sets and alpha.  The phylip and design parameters are required, unless you have valid current files.";
43                 helpString += "The design parameter allows you to assign your samples to groups when you are running amova. It is required.";
44                 helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.";
45         helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
46                 helpString += "The iters parameter allows you to set number of randomization for the P value.  The default is 1000.";
47                 helpString += "The amova command should be in the following format: amova(phylip=file.dist, design=file.design).";
48                 helpString += "Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).";
49                 return helpString;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "AmovaCommand", "getHelpString");
53                 exit(1);
54         }
55 }
56 //**********************************************************************************************************************
57 string AmovaCommand::getOutputPattern(string type) {
58     try {
59         string pattern = "";
60         
61         if (type == "amova") {  pattern = "[filename],amova"; } //makes file like: amazon.align
62         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
63         
64         return pattern;
65     }
66     catch(exception& e) {
67         m->errorOut(e, "AmovaCommand", "getOutputPattern");
68         exit(1);
69     }
70 }
71 //**********************************************************************************************************************
72 AmovaCommand::AmovaCommand(){   
73         try {
74                 abort = true; calledHelp = true; 
75                 setParameters();
76                 vector<string> tempOutNames;
77                 outputTypes["amova"] = tempOutNames;
78         }
79         catch(exception& e) {
80                 m->errorOut(e, "AmovaCommand", "AmovaCommand");
81                 exit(1);
82         }
83 }
84 //**********************************************************************************************************************
85 AmovaCommand::AmovaCommand(string option) {
86         try {
87                 abort = false; calledHelp = false;   
88                 
89                 //allow user to run help
90                 if(option == "help") { help(); abort = true; calledHelp = true; }
91                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
92                 
93                 else {
94                         vector<string> myArray = setParameters();
95                         
96                         OptionParser parser(option);
97                         map<string,string> parameters = parser.getParameters();
98                         
99                         ValidParameters validParameter;
100                         
101                         //check to make sure all parameters are valid for command
102                         map<string,string>::iterator it;
103                         for (it = parameters.begin(); it != parameters.end(); it++) { 
104                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
105                         }
106                         
107                         //initialize outputTypes
108                         vector<string> tempOutNames;
109                         outputTypes["amova"] = tempOutNames;
110                         
111                         //if the user changes the output directory command factory will send this info to us in the output parameter 
112                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
113                         
114                         //if the user changes the input directory command factory will send this info to us in the output parameter 
115                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
116                         if (inputDir == "not found"){   inputDir = "";          }
117                         else {
118                                 string path;
119                                 it = parameters.find("design");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
125                                 }
126                                 
127                                 it = parameters.find("phylip");
128                                 //user has given a template file
129                                 if(it != parameters.end()){ 
130                                         path = m->hasPath(it->second);
131                                         //if the user has not given a path then, add inputdir. else leave path alone.
132                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
133                                 }
134                         }
135                         
136                         phylipFileName = validParameter.validFile(parameters, "phylip", true);
137                         if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
138                         else if (phylipFileName == "not found") { 
139                                 //if there is a current phylip file, use it
140                                 phylipFileName = m->getPhylipFile(); 
141                                 if (phylipFileName != "") { m->mothurOut("Using " + phylipFileName + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
142                                 else {  m->mothurOut("You have no current phylip file and the phylip parameter is required."); m->mothurOutEndLine(); abort = true; }
143                         }else { m->setPhylipFile(phylipFileName); }
144                         
145                         //check for required parameters
146                         designFileName = validParameter.validFile(parameters, "design", true);
147                         if (designFileName == "not open") { designFileName = ""; abort = true; }
148                         else if (designFileName == "not found") {
149                                 //if there is a current design file, use it
150                                 designFileName = m->getDesignFile(); 
151                                 if (designFileName != "") { m->mothurOut("Using " + designFileName + " as input file for the design parameter."); m->mothurOutEndLine(); }
152                                 else {  m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }                           
153                         }else { m->setDesignFile(designFileName); }     
154
155                         string temp = validParameter.validFile(parameters, "iters", false);
156                         if (temp == "not found") { temp = "1000"; }
157                         m->mothurConvert(temp, iters); 
158                         
159                         temp = validParameter.validFile(parameters, "alpha", false);
160                         if (temp == "not found") { temp = "0.05"; }
161                         m->mothurConvert(temp, experimentwiseAlpha); 
162             
163             string sets = validParameter.validFile(parameters, "sets", false);                  
164                         if (sets == "not found") { sets = ""; }
165                         else { 
166                                 m->splitAtDash(sets, Sets);
167                         }
168                 }
169         }
170         catch(exception& e) {
171                 m->errorOut(e, "AmovaCommand", "AmovaCommand");
172                 exit(1);
173         }
174 }
175 //**********************************************************************************************************************
176
177 int AmovaCommand::execute(){
178         try {
179                 
180                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
181                 
182                 //read design file
183                 designMap = new GroupMap(designFileName);
184                 designMap->readDesignMap();
185
186                 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
187                                                 
188                 //read in distance matrix and square it
189                 ReadPhylipVector readMatrix(phylipFileName);
190                 vector<string> sampleNames = readMatrix.read(distanceMatrix);
191         
192         if (Sets.size() != 0) { //user selected sets, so we want to remove the samples not in those sets
193             SharedUtil util; 
194             vector<string> dGroups = designMap->getNamesOfGroups();
195             util.setGroups(Sets, dGroups);  
196
197             for(int i=0;i<distanceMatrix.size();i++){
198                 
199                 if (m->control_pressed) { delete designMap; return 0; }
200                 
201                 string group = designMap->getGroup(sampleNames[i]);
202                 
203                 if (group == "not found") {
204                     m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
205                 }else if (!m->inUsersGroups(group, Sets)){  //not in set we want remove it
206                     //remove from all other rows
207                     for(int j=0;j<distanceMatrix.size();j++){
208                         distanceMatrix[j].erase(distanceMatrix[j].begin()+i);
209                     }
210                     distanceMatrix.erase(distanceMatrix.begin()+i);
211                     sampleNames.erase(sampleNames.begin()+i);
212                     i--;
213                 }
214             }
215         }
216         
217                 for(int i=0;i<distanceMatrix.size();i++){
218                         for(int j=0;j<i;j++){
219                                 distanceMatrix[i][j] *= distanceMatrix[i][j];   
220                         }
221                 }
222                 
223                 //link designMap to rows/columns in distance matrix
224                 map<string, vector<int> > origGroupSampleMap;
225                 for(int i=0;i<sampleNames.size();i++){
226                         string group = designMap->getGroup(sampleNames[i]);
227                         
228                         if (group == "not found") {
229                                 m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
230                         }else { origGroupSampleMap[group].push_back(i); }
231                         
232                 }
233                 int numGroups = origGroupSampleMap.size();
234                 
235                 if (m->control_pressed) { delete designMap; return 0; }
236                 
237                 //create a new filename
238                 ofstream AMOVAFile;
239         map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(phylipFileName));
240                 string AMOVAFileName = getOutputFileName("amova", variables);   
241         
242                 m->openOutputFile(AMOVAFileName, AMOVAFile);
243                 outputNames.push_back(AMOVAFileName); outputTypes["amova"].push_back(AMOVAFileName);
244                 
245                 double fullANOVAPValue = runAMOVA(AMOVAFile, origGroupSampleMap, experimentwiseAlpha);
246                 if(fullANOVAPValue <= experimentwiseAlpha && numGroups > 2){
247                         
248                         int numCombos = numGroups * (numGroups-1) / 2;
249                         double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
250                         
251                         map<string, vector<int> >::iterator itA;
252                         map<string, vector<int> >::iterator itB;
253                         
254                         for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
255                                 itB = itA;itB++;
256                                 for(itB;itB!=origGroupSampleMap.end();itB++){
257                                         
258                                         map<string, vector<int> > pairwiseGroupSampleMap;
259                                         pairwiseGroupSampleMap[itA->first] = itA->second;
260                                         pairwiseGroupSampleMap[itB->first] = itB->second;
261                                         
262                                         runAMOVA(AMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha);
263                                 }                       
264                         }
265                         m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
266                         m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
267                 }
268                 else{
269                         m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
270                 }
271                 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
272                 AMOVAFile.close();
273                 
274                 delete designMap;
275          
276                 m->mothurOutEndLine();
277                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
278                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
279                 m->mothurOutEndLine();
280                 
281                 return 0;
282         }
283         catch(exception& e) {
284                 m->errorOut(e, "AmovaCommand", "execute");
285                 exit(1);
286         }
287 }
288
289 //**********************************************************************************************************************
290
291 double AmovaCommand::runAMOVA(ofstream& AMOVAFile, map<string, vector<int> > groupSampleMap, double alpha) {
292         try {
293                 map<string, vector<int> >::iterator it;
294
295                 int numGroups = groupSampleMap.size();
296                 int totalNumSamples = 0;
297
298                 for(it = groupSampleMap.begin();it!=groupSampleMap.end();it++){
299                         totalNumSamples += it->second.size();                   
300                 }
301
302                 double ssTotalOrig = calcSSTotal(groupSampleMap);
303                 double ssWithinOrig = calcSSWithin(groupSampleMap);
304                 double ssAmongOrig = ssTotalOrig - ssWithinOrig;
305                 
306                 double counter = 0;
307                 for(int i=0;i<iters;i++){
308                         map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
309                         double ssWithinRand = calcSSWithin(randomizedGroup);
310                         if(ssWithinRand <= ssWithinOrig){       counter++;      }
311                 }
312                 
313                 double pValue = (double)counter / (double) iters;
314                 string pString = "";
315                 if(pValue < 1/(double)iters){   pString = '<' + toString(1/(double)iters);      }
316                 else                                            {       pString = toString(pValue);                                     }
317                 
318                 
319                 //print anova table
320                 it = groupSampleMap.begin();
321                 AMOVAFile << it->first;
322                 m->mothurOut(it->first);
323                 it++;
324                 for(it;it!=groupSampleMap.end();it++){
325                         AMOVAFile << '-' << it->first;
326                         m->mothurOut('-' + it->first);
327                 }
328                 
329                 AMOVAFile << "\tAmong\tWithin\tTotal" << endl;
330                 m->mothurOut("\tAmong\tWithin\tTotal\n");
331                 
332                 AMOVAFile << "SS\t" << ssAmongOrig << '\t' << ssWithinOrig << '\t' << ssTotalOrig << endl;
333                 m->mothurOut("SS\t" + toString(ssAmongOrig) + '\t' + toString(ssWithinOrig) + '\t' + toString(ssTotalOrig) + '\n');
334                 
335                 int dfAmong = numGroups - 1;                            double MSAmong = ssAmongOrig / (double) dfAmong;
336                 int dfWithin = totalNumSamples - numGroups;     double MSWithin = ssWithinOrig / (double) dfWithin;
337                 int dfTotal = totalNumSamples - 1;                      double Fs = MSAmong / MSWithin;
338                 
339                 AMOVAFile << "df\t" << dfAmong << '\t' << dfWithin << '\t' << dfTotal << endl;
340                 m->mothurOut("df\t" + toString(dfAmong) + '\t' + toString(dfWithin) + '\t' + toString(dfTotal) + '\n');
341
342                 AMOVAFile << "MS\t" << MSAmong << '\t' << MSWithin << endl << endl;
343                 m->mothurOut("MS\t" + toString(MSAmong) + '\t' + toString(MSWithin) + "\n\n");
344
345                 AMOVAFile << "Fs:\t" << Fs << endl;
346                 m->mothurOut("Fs:\t" + toString(Fs) + '\n');
347                 
348                 AMOVAFile << "p-value: " << pString;
349                 m->mothurOut("p-value: " + pString);
350
351                 if(pValue < alpha){
352                         AMOVAFile << "*";
353                         m->mothurOut("*");
354                 }
355                 AMOVAFile << endl << endl;
356                 m->mothurOutEndLine();m->mothurOutEndLine();
357
358                 return pValue;
359         }
360         catch(exception& e) {
361                 m->errorOut(e, "AmovaCommand", "runAMOVA");
362                 exit(1);
363         }
364 }
365
366 //**********************************************************************************************************************
367
368 map<string, vector<int> > AmovaCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
369         try{
370                 vector<int> sampleIndices;
371                 vector<int> samplesPerGroup;
372                 
373                 map<string, vector<int> >::iterator it;
374                 for(it=origMapping.begin();it!=origMapping.end();it++){
375                         vector<int> indices = it->second;
376                         samplesPerGroup.push_back(indices.size());
377                         sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
378                 }
379                 
380                 random_shuffle(sampleIndices.begin(), sampleIndices.end());
381                 
382                 int index = 0;
383                 map<string, vector<int> > randomizedGroups = origMapping;
384                 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
385                         for(int i=0;i<it->second.size();i++){
386                                 it->second[i] = sampleIndices[index++];                         
387                         }
388                 }
389
390                 return randomizedGroups;                
391         }
392         catch (exception& e) {
393                 m->errorOut(e, "AmovaCommand", "getRandomizedGroups");
394                 exit(1);
395         }
396 }
397
398 //**********************************************************************************************************************
399
400 double AmovaCommand::calcSSTotal(map<string, vector<int> >& groupSampleMap) {
401         try {
402                 
403                 vector<int> indices;
404                 map<string, vector<int> >::iterator it;
405                 for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
406                         indices.insert(indices.end(), it->second.begin(), it->second.end());                    
407                 }
408                 sort(indices.begin(), indices.end());
409                         
410                 int numIndices =indices.size();
411                 double ssTotal = 0.0;
412                 
413                 for(int i=1;i<numIndices;i++){
414                         int row = indices[i];
415                         
416                         for(int j=0;j<i;j++){
417                                 ssTotal += distanceMatrix[row][indices[j]];
418                         }
419                 }
420                 ssTotal /= numIndices;
421                         
422                 return ssTotal;
423         }
424         catch(exception& e) {
425                 m->errorOut(e, "AmovaCommand", "calcSSTotal");
426                 exit(1);
427         }
428 }
429
430 //**********************************************************************************************************************
431
432 double AmovaCommand::calcSSWithin(map<string, vector<int> >& groupSampleMap) {
433         try {
434
435                 double ssWithin = 0.0;
436                 
437                 map<string, vector<int> >::iterator it;
438                 for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
439                         
440                         double withinGroup = 0;
441                         
442                         vector<int> samples = it->second;
443                         
444                         for(int i=0;i<samples.size();i++){
445                                 int row = samples[i];
446
447                                 for(int j=0;j<samples.size();j++){
448                                         int col = samples[j];
449
450                                         if(col < row){
451                                                 withinGroup += distanceMatrix[row][col];
452                                         }
453                                         
454                                 }
455                         }
456
457                         ssWithin += withinGroup / samples.size();
458                 }
459
460                 return ssWithin;
461         }
462         catch(exception& e) {
463                 m->errorOut(e, "AmovaCommand", "calcSSWithin");
464                 exit(1);
465         }
466 }
467
468 //**********************************************************************************************************************