]> git.donarmstrong.com Git - mothur.git/blob - amovacommand.cpp
added sets to amova and homova commands. added oligos to make.contigs. added metadat...
[mothur.git] / amovacommand.cpp
1 /*
2  *  amovacommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 2/7/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "amovacommand.h"
11 #include "readphylipvector.h"
12 #include "groupmap.h"
13 #include "sharedutilities.h"
14
15
16 //**********************************************************************************************************************
17 vector<string> AmovaCommand::setParameters(){   
18         try {
19                 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
20         CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
21                 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pphylip);
22                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23                 CommandParameter palpha("alpha", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(palpha);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26         
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "AmovaCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string AmovaCommand::getHelpString(){   
38         try {
39                 string helpString = "";
40                 helpString += "Referenced: Anderson MJ (2001). A new method for non-parametric multivariate analysis of variance. Austral Ecol 26: 32-46.";
41                 helpString += "The amova command outputs a .amova file.";
42                 helpString += "The amova command parameters are phylip, iters, sets and alpha.  The phylip and design parameters are required, unless you have valid current files.";
43                 helpString += "The design parameter allows you to assign your samples to groups when you are running amova. It is required.";
44                 helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.";
45         helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
46                 helpString += "The iters parameter allows you to set number of randomization for the P value.  The default is 1000.";
47                 helpString += "The amova command should be in the following format: amova(phylip=file.dist, design=file.design).";
48                 helpString += "Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).";
49                 return helpString;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "AmovaCommand", "getHelpString");
53                 exit(1);
54         }
55 }
56 //**********************************************************************************************************************
57 string AmovaCommand::getOutputFileNameTag(string type, string inputName=""){    
58         try {
59         string tag = "";
60                 map<string, vector<string> >::iterator it;
61         
62         //is this a type this command creates
63         it = outputTypes.find(type);
64         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
65         else {
66             if (type == "amova") {  tag = "amova"; }
67             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file.\n");  }
68         }
69         return tag;
70         }
71         catch(exception& e) {
72                 m->errorOut(e, "AmovaCommand", "getOutputFileNameTag");
73                 exit(1);
74         }
75 }
76 //**********************************************************************************************************************
77 AmovaCommand::AmovaCommand(){   
78         try {
79                 abort = true; calledHelp = true; 
80                 setParameters();
81                 vector<string> tempOutNames;
82                 outputTypes["amova"] = tempOutNames;
83         }
84         catch(exception& e) {
85                 m->errorOut(e, "AmovaCommand", "AmovaCommand");
86                 exit(1);
87         }
88 }
89 //**********************************************************************************************************************
90 AmovaCommand::AmovaCommand(string option) {
91         try {
92                 abort = false; calledHelp = false;   
93                 
94                 //allow user to run help
95                 if(option == "help") { help(); abort = true; calledHelp = true; }
96                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
97                 
98                 else {
99                         vector<string> myArray = setParameters();
100                         
101                         OptionParser parser(option);
102                         map<string,string> parameters = parser.getParameters();
103                         
104                         ValidParameters validParameter;
105                         
106                         //check to make sure all parameters are valid for command
107                         map<string,string>::iterator it;
108                         for (it = parameters.begin(); it != parameters.end(); it++) { 
109                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
110                         }
111                         
112                         //initialize outputTypes
113                         vector<string> tempOutNames;
114                         outputTypes["amova"] = tempOutNames;
115                         
116                         //if the user changes the output directory command factory will send this info to us in the output parameter 
117                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
118                         
119                         //if the user changes the input directory command factory will send this info to us in the output parameter 
120                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
121                         if (inputDir == "not found"){   inputDir = "";          }
122                         else {
123                                 string path;
124                                 it = parameters.find("design");
125                                 //user has given a template file
126                                 if(it != parameters.end()){ 
127                                         path = m->hasPath(it->second);
128                                         //if the user has not given a path then, add inputdir. else leave path alone.
129                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
130                                 }
131                                 
132                                 it = parameters.find("phylip");
133                                 //user has given a template file
134                                 if(it != parameters.end()){ 
135                                         path = m->hasPath(it->second);
136                                         //if the user has not given a path then, add inputdir. else leave path alone.
137                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
138                                 }
139                         }
140                         
141                         phylipFileName = validParameter.validFile(parameters, "phylip", true);
142                         if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
143                         else if (phylipFileName == "not found") { 
144                                 //if there is a current phylip file, use it
145                                 phylipFileName = m->getPhylipFile(); 
146                                 if (phylipFileName != "") { m->mothurOut("Using " + phylipFileName + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
147                                 else {  m->mothurOut("You have no current phylip file and the phylip parameter is required."); m->mothurOutEndLine(); abort = true; }
148                         }else { m->setPhylipFile(phylipFileName); }
149                         
150                         //check for required parameters
151                         designFileName = validParameter.validFile(parameters, "design", true);
152                         if (designFileName == "not open") { designFileName = ""; abort = true; }
153                         else if (designFileName == "not found") {
154                                 //if there is a current design file, use it
155                                 designFileName = m->getDesignFile(); 
156                                 if (designFileName != "") { m->mothurOut("Using " + designFileName + " as input file for the design parameter."); m->mothurOutEndLine(); }
157                                 else {  m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }                           
158                         }else { m->setDesignFile(designFileName); }     
159
160                         string temp = validParameter.validFile(parameters, "iters", false);
161                         if (temp == "not found") { temp = "1000"; }
162                         m->mothurConvert(temp, iters); 
163                         
164                         temp = validParameter.validFile(parameters, "alpha", false);
165                         if (temp == "not found") { temp = "0.05"; }
166                         m->mothurConvert(temp, experimentwiseAlpha); 
167             
168             string sets = validParameter.validFile(parameters, "sets", false);                  
169                         if (sets == "not found") { sets = ""; }
170                         else { 
171                                 m->splitAtDash(sets, Sets);
172                         }
173                 }
174         }
175         catch(exception& e) {
176                 m->errorOut(e, "AmovaCommand", "AmovaCommand");
177                 exit(1);
178         }
179 }
180 //**********************************************************************************************************************
181
182 int AmovaCommand::execute(){
183         try {
184                 
185                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
186                 
187                 //read design file
188                 designMap = new GroupMap(designFileName);
189                 designMap->readDesignMap();
190
191                 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
192                                                 
193                 //read in distance matrix and square it
194                 ReadPhylipVector readMatrix(phylipFileName);
195                 vector<string> sampleNames = readMatrix.read(distanceMatrix);
196         
197         if (Sets.size() != 0) { //user selected sets, so we want to remove the samples not in those sets
198             SharedUtil util; 
199             vector<string> dGroups = designMap->getNamesOfGroups();
200             util.setGroups(Sets, dGroups);  
201
202             for(int i=0;i<distanceMatrix.size();i++){
203                 
204                 if (m->control_pressed) { delete designMap; return 0; }
205                 
206                 string group = designMap->getGroup(sampleNames[i]);
207                 
208                 if (group == "not found") {
209                     m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
210                 }else if (!m->inUsersGroups(group, Sets)){  //not in set we want remove it
211                     //remove from all other rows
212                     for(int j=0;j<distanceMatrix.size();j++){
213                         distanceMatrix[j].erase(distanceMatrix[j].begin()+i);
214                     }
215                     distanceMatrix.erase(distanceMatrix.begin()+i);
216                     sampleNames.erase(sampleNames.begin()+i);
217                     i--;
218                 }
219             }
220         }
221         
222                 for(int i=0;i<distanceMatrix.size();i++){
223                         for(int j=0;j<i;j++){
224                                 distanceMatrix[i][j] *= distanceMatrix[i][j];   
225                         }
226                 }
227                 
228                 //link designMap to rows/columns in distance matrix
229                 map<string, vector<int> > origGroupSampleMap;
230                 for(int i=0;i<sampleNames.size();i++){
231                         string group = designMap->getGroup(sampleNames[i]);
232                         
233                         if (group == "not found") {
234                                 m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
235                         }else { origGroupSampleMap[group].push_back(i); }
236                         
237                 }
238                 int numGroups = origGroupSampleMap.size();
239                 
240                 if (m->control_pressed) { delete designMap; return 0; }
241                 
242                 //create a new filename
243                 ofstream AMOVAFile;
244                 string AMOVAFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName)) + getOutputFileNameTag("amova");                            
245                 m->openOutputFile(AMOVAFileName, AMOVAFile);
246                 outputNames.push_back(AMOVAFileName); outputTypes["amova"].push_back(AMOVAFileName);
247                 
248                 double fullANOVAPValue = runAMOVA(AMOVAFile, origGroupSampleMap, experimentwiseAlpha);
249                 if(fullANOVAPValue <= experimentwiseAlpha && numGroups > 2){
250                         
251                         int numCombos = numGroups * (numGroups-1) / 2;
252                         double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
253                         
254                         map<string, vector<int> >::iterator itA;
255                         map<string, vector<int> >::iterator itB;
256                         
257                         for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
258                                 itB = itA;itB++;
259                                 for(itB;itB!=origGroupSampleMap.end();itB++){
260                                         
261                                         map<string, vector<int> > pairwiseGroupSampleMap;
262                                         pairwiseGroupSampleMap[itA->first] = itA->second;
263                                         pairwiseGroupSampleMap[itB->first] = itB->second;
264                                         
265                                         runAMOVA(AMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha);
266                                 }                       
267                         }
268                         m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
269                         m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
270                 }
271                 else{
272                         m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
273                 }
274                 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
275                 AMOVAFile.close();
276                 
277                 delete designMap;
278          
279                 m->mothurOutEndLine();
280                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
281                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
282                 m->mothurOutEndLine();
283                 
284                 return 0;
285         }
286         catch(exception& e) {
287                 m->errorOut(e, "AmovaCommand", "execute");
288                 exit(1);
289         }
290 }
291
292 //**********************************************************************************************************************
293
294 double AmovaCommand::runAMOVA(ofstream& AMOVAFile, map<string, vector<int> > groupSampleMap, double alpha) {
295         try {
296                 map<string, vector<int> >::iterator it;
297
298                 int numGroups = groupSampleMap.size();
299                 int totalNumSamples = 0;
300
301                 for(it = groupSampleMap.begin();it!=groupSampleMap.end();it++){
302                         totalNumSamples += it->second.size();                   
303                 }
304
305                 double ssTotalOrig = calcSSTotal(groupSampleMap);
306                 double ssWithinOrig = calcSSWithin(groupSampleMap);
307                 double ssAmongOrig = ssTotalOrig - ssWithinOrig;
308                 
309                 double counter = 0;
310                 for(int i=0;i<iters;i++){
311                         map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
312                         double ssWithinRand = calcSSWithin(randomizedGroup);
313                         if(ssWithinRand < ssWithinOrig){        counter++;      }
314                 }
315                 
316                 double pValue = (double)counter / (double) iters;
317                 string pString = "";
318                 if(pValue < 1/(double)iters){   pString = '<' + toString(1/(double)iters);      }
319                 else                                            {       pString = toString(pValue);                                     }
320                 
321                 
322                 //print anova table
323                 it = groupSampleMap.begin();
324                 AMOVAFile << it->first;
325                 m->mothurOut(it->first);
326                 it++;
327                 for(it;it!=groupSampleMap.end();it++){
328                         AMOVAFile << '-' << it->first;
329                         m->mothurOut('-' + it->first);
330                 }
331                 
332                 AMOVAFile << "\tAmong\tWithin\tTotal" << endl;
333                 m->mothurOut("\tAmong\tWithin\tTotal\n");
334                 
335                 AMOVAFile << "SS\t" << ssAmongOrig << '\t' << ssWithinOrig << '\t' << ssTotalOrig << endl;
336                 m->mothurOut("SS\t" + toString(ssAmongOrig) + '\t' + toString(ssWithinOrig) + '\t' + toString(ssTotalOrig) + '\n');
337                 
338                 int dfAmong = numGroups - 1;                            double MSAmong = ssAmongOrig / (double) dfAmong;
339                 int dfWithin = totalNumSamples - numGroups;     double MSWithin = ssWithinOrig / (double) dfWithin;
340                 int dfTotal = totalNumSamples - 1;                      double Fs = MSAmong / MSWithin;
341                 
342                 AMOVAFile << "df\t" << dfAmong << '\t' << dfWithin << '\t' << dfTotal << endl;
343                 m->mothurOut("df\t" + toString(dfAmong) + '\t' + toString(dfWithin) + '\t' + toString(dfTotal) + '\n');
344
345                 AMOVAFile << "MS\t" << MSAmong << '\t' << MSWithin << endl << endl;
346                 m->mothurOut("MS\t" + toString(MSAmong) + '\t' + toString(MSWithin) + "\n\n");
347
348                 AMOVAFile << "Fs:\t" << Fs << endl;
349                 m->mothurOut("Fs:\t" + toString(Fs) + '\n');
350                 
351                 AMOVAFile << "p-value: " << pString;
352                 m->mothurOut("p-value: " + pString);
353
354                 if(pValue < alpha){
355                         AMOVAFile << "*";
356                         m->mothurOut("*");
357                 }
358                 AMOVAFile << endl << endl;
359                 m->mothurOutEndLine();m->mothurOutEndLine();
360
361                 return pValue;
362         }
363         catch(exception& e) {
364                 m->errorOut(e, "AmovaCommand", "runAMOVA");
365                 exit(1);
366         }
367 }
368
369 //**********************************************************************************************************************
370
371 map<string, vector<int> > AmovaCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
372         try{
373                 vector<int> sampleIndices;
374                 vector<int> samplesPerGroup;
375                 
376                 map<string, vector<int> >::iterator it;
377                 for(it=origMapping.begin();it!=origMapping.end();it++){
378                         vector<int> indices = it->second;
379                         samplesPerGroup.push_back(indices.size());
380                         sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
381                 }
382                 
383                 random_shuffle(sampleIndices.begin(), sampleIndices.end());
384                 
385                 int index = 0;
386                 map<string, vector<int> > randomizedGroups = origMapping;
387                 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
388                         for(int i=0;i<it->second.size();i++){
389                                 it->second[i] = sampleIndices[index++];                         
390                         }
391                 }
392
393                 return randomizedGroups;                
394         }
395         catch (exception& e) {
396                 m->errorOut(e, "AmovaCommand", "getRandomizedGroups");
397                 exit(1);
398         }
399 }
400
401 //**********************************************************************************************************************
402
403 double AmovaCommand::calcSSTotal(map<string, vector<int> >& groupSampleMap) {
404         try {
405                 
406                 vector<int> indices;
407                 map<string, vector<int> >::iterator it;
408                 for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
409                         indices.insert(indices.end(), it->second.begin(), it->second.end());                    
410                 }
411                 sort(indices.begin(), indices.end());
412                         
413                 int numIndices =indices.size();
414                 double ssTotal = 0.0;
415                 
416                 for(int i=1;i<numIndices;i++){
417                         int row = indices[i];
418                         
419                         for(int j=0;j<i;j++){
420                                 ssTotal += distanceMatrix[row][indices[j]];
421                         }
422                 }
423                 ssTotal /= numIndices;
424                         
425                 return ssTotal;
426         }
427         catch(exception& e) {
428                 m->errorOut(e, "AmovaCommand", "calcSSTotal");
429                 exit(1);
430         }
431 }
432
433 //**********************************************************************************************************************
434
435 double AmovaCommand::calcSSWithin(map<string, vector<int> >& groupSampleMap) {
436         try {
437
438                 double ssWithin = 0.0;
439                 
440                 map<string, vector<int> >::iterator it;
441                 for(it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
442                         
443                         double withinGroup = 0;
444                         
445                         vector<int> samples = it->second;
446                         
447                         for(int i=0;i<samples.size();i++){
448                                 int row = samples[i];
449
450                                 for(int j=0;j<samples.size();j++){
451                                         int col = samples[j];
452
453                                         if(col < row){
454                                                 withinGroup += distanceMatrix[row][col];
455                                         }
456                                         
457                                 }
458                         }
459
460                         ssWithin += withinGroup / samples.size();
461                 }
462
463                 return ssWithin;
464         }
465         catch(exception& e) {
466                 m->errorOut(e, "AmovaCommand", "calcSSWithin");
467                 exit(1);
468         }
469 }
470
471 //**********************************************************************************************************************