]> git.donarmstrong.com Git - mothur.git/blob - anosimcommand.cpp
reworked amova / homova / anosim
[mothur.git] / anosimcommand.cpp
1 /*
2  *  anosimcommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 2/14/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "anosimcommand.h"
11 #include "inputdata.h"
12 #include "readphylipvector.h"
13
14 //**********************************************************************************************************************
15 vector<string> AnosimCommand::getValidParameters(){     
16         try {
17                 string Array[] =  {"outputdir","iters","phylip","design", "alpha","inputdir"};
18                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
19                 return myArray;
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "AnosimCommand", "getValidParameters");
23                 exit(1);
24         }
25 }
26 //**********************************************************************************************************************
27 AnosimCommand::AnosimCommand(){ 
28         try {
29                 abort = true; calledHelp = true; 
30                 vector<string> tempOutNames;
31                 outputTypes["anosim"] = tempOutNames;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "AnosimCommand", "AnosimCommand");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 vector<string> AnosimCommand::getRequiredParameters(){  
40         try {
41                 string Array[] =  {"design"};
42                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
43                 return myArray;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "AnosimCommand", "getRequiredParameters");
47                 exit(1);
48         }
49 }
50 //**********************************************************************************************************************
51 vector<string> AnosimCommand::getRequiredFiles(){       
52         try {
53                 string Array[] =  {};
54                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
55                 return myArray;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "AnosimCommand", "getRequiredFiles");
59                 exit(1);
60         }
61 }
62 //**********************************************************************************************************************
63
64 AnosimCommand::AnosimCommand(string option) {
65         try {
66                 abort = false; calledHelp = false;   
67                 
68                 //allow user to run help
69                 if(option == "help") { help(); abort = true; calledHelp = true; }
70                 
71                 else {
72                         //valid paramters for this command
73                         string AlignArray[] =  {"outputdir","iters","phylip","design", "alpha","inputdir"};
74                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
75                         
76                         OptionParser parser(option);
77                         map<string,string> parameters = parser.getParameters();
78                         
79                         ValidParameters validParameter;
80                         
81                         //check to make sure all parameters are valid for command
82                         map<string,string>::iterator it;
83                         for (it = parameters.begin(); it != parameters.end(); it++) { 
84                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
85                         }
86                         
87                         //initialize outputTypes
88                         vector<string> tempOutNames;
89                         outputTypes["anosim"] = tempOutNames;
90                         
91                         //if the user changes the output directory command factory will send this info to us in the output parameter 
92                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
93                         
94                         //if the user changes the input directory command factory will send this info to us in the output parameter 
95                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
96                         if (inputDir == "not found"){   inputDir = "";          }
97                         else {
98                                 string path;
99                                 it = parameters.find("design");
100                                 //user has given a template file
101                                 if(it != parameters.end()){ 
102                                         path = m->hasPath(it->second);
103                                         //if the user has not given a path then, add inputdir. else leave path alone.
104                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
105                                 }
106                                 
107                                 it = parameters.find("phylip");
108                                 //user has given a template file
109                                 if(it != parameters.end()){ 
110                                         path = m->hasPath(it->second);
111                                         //if the user has not given a path then, add inputdir. else leave path alone.
112                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
113                                 }
114                         }
115                         
116                         phylipFileName = validParameter.validFile(parameters, "phylip", true);
117                         if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
118                         else if (phylipFileName == "not found") { phylipFileName = ""; }        
119                         else if (designFileName == "not found") {
120                                 designFileName = "";
121                                 m->mothurOut("You must provide an phylip file.");
122                                 m->mothurOutEndLine();
123                                 abort = true;
124                         }       
125                         
126                         //check for required parameters
127                         designFileName = validParameter.validFile(parameters, "design", true);
128                         if (designFileName == "not open") { abort = true; }
129                         else if (designFileName == "not found") {
130                                 designFileName = "";
131                                 m->mothurOut("You must provide an design file.");
132                                 m->mothurOutEndLine();
133                                 abort = true;
134                         }       
135                         
136                         string temp = validParameter.validFile(parameters, "iters", false);
137                         if (temp == "not found") { temp = "1000"; }
138                         convert(temp, iters); 
139                         
140                         temp = validParameter.validFile(parameters, "alpha", false);
141                         if (temp == "not found") { temp = "0.05"; }
142                         convert(temp, experimentwiseAlpha); 
143                 }
144                 
145         }
146         catch(exception& e) {
147                 m->errorOut(e, "AnosimCommand", "AnosimCommand");
148                 exit(1);
149         }
150 }
151
152 //**********************************************************************************************************************
153
154 void AnosimCommand::help(){
155         try {
156                 m->mothurOut("Referenced: Clarke, K. R. (1993). Non-parametric multivariate analysis of changes in community structure.   _Australian Journal of Ecology_ 18, 117-143.\n");
157                 m->mothurOut("The anosim command outputs a .anosim file. \n");
158                 m->mothurOut("The anosim command parameters are phylip, iters, and alpha.  The phylip and design parameters are required.\n");
159                 m->mothurOut("The design parameter allows you to assign your samples to groups when you are running anosim. It is required. \n");
160                 m->mothurOut("The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.\n");
161                 m->mothurOut("The iters parameter allows you to set number of randomization for the P value.  The default is 1000. \n");
162                 m->mothurOut("The anosim command should be in the following format: anosim(phylip=file.dist, design=file.design).\n");
163                 m->mothurOut("Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).\n\n");
164                 
165         }
166         catch(exception& e) {
167                 m->errorOut(e, "AnosimCommand", "help");
168                 exit(1);
169         }
170 }
171
172 //**********************************************************************************************************************
173
174 AnosimCommand::~AnosimCommand(){}
175
176 //**********************************************************************************************************************
177
178 int AnosimCommand::execute(){
179         try {
180                 
181                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
182                 
183                 //read design file
184                 designMap = new GroupMap(designFileName);
185                 designMap->readDesignMap();
186                 
187                 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
188                 
189                 //read in distance matrix and square it
190                 ReadPhylipVector readMatrix(phylipFileName);
191                 vector<string> sampleNames = readMatrix.read(distanceMatrix);
192                 
193                 for(int i=0;i<distanceMatrix.size();i++){
194                         for(int j=0;j<i;j++){
195                                 distanceMatrix[i][j] *= distanceMatrix[i][j];   
196                         }
197                 }
198                 
199                 //link designMap to rows/columns in distance matrix
200                 map<string, vector<int> > origGroupSampleMap;
201                 for(int i=0;i<sampleNames.size();i++){
202                         origGroupSampleMap[designMap->getGroup(sampleNames[i])].push_back(i);
203                 }
204                 int numGroups = origGroupSampleMap.size();
205                 
206                 //create a new filename
207                 ofstream ANOSIMFile;
208                 string ANOSIMFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName))  + "anosim";                               
209                 m->openOutputFile(ANOSIMFileName, ANOSIMFile);
210                 outputNames.push_back(ANOSIMFileName); outputTypes["anosim"].push_back(ANOSIMFileName);
211                 m->mothurOut("\ncomparison\tR-value\tP-value\n");
212                 ANOSIMFile << "comparison\tR-value\tP-value\n";
213                 
214                 
215                 double fullANOSIMPValue = runANOSIM(ANOSIMFile, distanceMatrix, origGroupSampleMap, experimentwiseAlpha);
216                 
217                 
218                 if(fullANOSIMPValue <= experimentwiseAlpha && numGroups > 2){
219
220                         int numCombos = numGroups * (numGroups-1) / 2;
221                         double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
222
223                         for(map<string, vector<int> >::iterator itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
224                                 map<string, vector<int> >::iterator itB = itA;
225                                 itB++;
226                                 for(itB;itB!=origGroupSampleMap.end();itB++){
227                                         
228                                         map<string, vector<int> > subGroupSampleMap;
229                                         
230                                         subGroupSampleMap[itA->first] = itA->second;    string groupA = itA->first;
231                                         subGroupSampleMap[itB->first] = itB->second;    string groupB = itB->first;
232                         
233                                         vector<int> subIndices;
234                                         for(map<string, vector<int> >::iterator it=subGroupSampleMap.begin();it!=subGroupSampleMap.end();it++){
235                                                 subIndices.insert(subIndices.end(), it->second.begin(), it->second.end());
236                                         }
237                                         int subNumSamples = subIndices.size();
238
239                                         sort(subIndices.begin(), subIndices.end());             
240                                         
241                                         vector<vector<double> > subDistMatrix(distanceMatrix.size());
242                                         for(int i=0;i<distanceMatrix.size();i++){
243                                                 subDistMatrix[i].assign(distanceMatrix.size(), -1);
244                                         }
245
246                                         for(int i=0;i<subNumSamples;i++){
247                                                 for(int j=0;j<i;j++){
248                                                         subDistMatrix[subIndices[i]][subIndices[j]] = distanceMatrix[subIndices[i]][subIndices[j]];
249                                                 }
250                                         }
251
252                                         runANOSIM(ANOSIMFile, subDistMatrix, subGroupSampleMap, pairwiseAlpha);
253
254                                 }
255                         }
256                         
257                         m->mothurOut("\nExperiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
258                         m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
259                 }
260                 else{
261                         m->mothurOut("\nExperiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
262                 }
263                 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
264                 ANOSIMFile.close();
265                 
266                         
267                 delete designMap;
268                                 
269                 m->mothurOutEndLine();
270                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
271                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
272                 m->mothurOutEndLine();
273                 
274                 return 0;
275         }
276         catch(exception& e) {
277                 m->errorOut(e, "AnosimCommand", "execute");
278                 exit(1);
279         }
280 }
281 //**********************************************************************************************************************
282
283 double AnosimCommand::runANOSIM(ofstream& ANOSIMFile, vector<vector<double> > dMatrix, map<string, vector<int> > groupSampleMap, double alpha) {
284         try {
285
286                 
287                 vector<vector<double> > rankMatrix = convertToRanks(dMatrix);
288                 double RValue = calcR(rankMatrix, groupSampleMap);
289                 
290                 int pCount = 0;
291                 for(int i=0;i<iters;i++){
292                         map<string, vector<int> > randGroupSampleMap = getRandomizedGroups(groupSampleMap);
293                         double RValueRand = calcR(rankMatrix, randGroupSampleMap);
294                         if(RValue <= RValueRand){       pCount++;       }
295                 }
296
297                 double pValue = (double)pCount / (double) iters;
298                 string pString = "";
299                 if(pValue < 1/(double)iters){   pString = '<' + toString(1/(double)iters);      }
300                 else                                            {       pString = toString(pValue);                                     }
301                 
302                 
303                 map<string, vector<int> >::iterator it=groupSampleMap.begin();
304                 m->mothurOut(it->first);
305                 ANOSIMFile << it->first;
306                 it++;
307                 for(it;it!=groupSampleMap.end();it++){
308                         m->mothurOut('-' + it->first);
309                         ANOSIMFile << '-' << it->first;
310                 
311                 }
312                 m->mothurOut('\t' + toString(RValue) + '\t' + pString);
313                 ANOSIMFile << '\t' << RValue << '\t' << pString;
314
315                 if(pValue < alpha){
316                         ANOSIMFile << "*";
317                         m->mothurOut("*");
318                 }
319                 ANOSIMFile << endl;
320                 m->mothurOutEndLine();
321                 
322                 return pValue;
323         }
324         catch(exception& e) {
325                 m->errorOut(e, "AnosimCommand", "calcAnisom");
326                 exit(1);
327         }
328 }
329
330 //**********************************************************************************************************************
331
332 double AnosimCommand::calcR(vector<vector<double> > rankMatrix, map<string, vector<int> > groupSampleMap){
333         try {
334
335                 int numSamples = 0;
336                 for(map<string, vector<int> >::iterator it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
337                         numSamples += it->second.size();
338                 }
339                 
340                 
341                 double within = 0.0;
342                 int numWithinComps = 0;         
343                 
344                 for(map<string, vector<int> >::iterator it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
345                         vector<int> indices = it->second;
346                         for(int i=0;i<indices.size();i++){
347                                 for(int j=0;j<i;j++){
348                                         if(indices[i] > indices[j])     {       within += rankMatrix[indices[i]][indices[j]];   }
349                                         else                                            {       within += rankMatrix[indices[j]][indices[i]];   }
350                                         numWithinComps++;
351                                 }
352                         }
353                 }
354                 
355                 within /= (float) numWithinComps;
356                 
357                 double between = 0.0;
358                 int numBetweenComps = 0;
359
360                 map<string, vector<int> >::iterator itB;
361                 
362                 for(map<string, vector<int> >::iterator itA=groupSampleMap.begin();itA!=groupSampleMap.end();itA++){
363
364                         for(int i=0;i<itA->second.size();i++){
365                                 int A = itA->second[i];
366                                 map<string, vector<int> >::iterator itB = itA;
367                                 itB++;
368                                 for(itB;itB!=groupSampleMap.end();itB++){
369                                         for(int j=0;j<itB->second.size();j++){
370                                                 int B = itB->second[j];
371                                                 if(A>B) {       between += rankMatrix[A][B];    }
372                                                 else    {       between += rankMatrix[B][A];    }
373                                                 numBetweenComps++;
374                                         }                                       
375                                 }
376                                 
377                         }
378                 }
379                 
380                 
381                 between /= (float) numBetweenComps;
382                 
383                 double Rvalue = (between - within)/(numSamples * (numSamples-1) / 4.0);
384                                 
385                 return Rvalue;
386         }
387         catch(exception& e) {
388                 m->errorOut(e, "AnosimCommand", "calcWithinBetween");
389                 exit(1);
390         }
391 }
392
393 //**********************************************************************************************************************
394
395 vector<vector<double> > AnosimCommand::convertToRanks(vector<vector<double> > dist) {
396         try {
397                 vector<seqDist> cells;
398                 vector<vector<double> > ranks = dist;
399                 
400                 for (int i = 0; i < dist.size(); i++) {
401                         for (int j = 0; j < i; j++) {
402                                 if(dist[i][j] != -1){
403                                         seqDist member(i, j, dist[i][j]);
404                                         cells.push_back(member);
405                                 }
406                         }
407                 }
408                 
409                 
410                 //sort distances
411                 sort(cells.begin(), cells.end(), compareSequenceDistance);      
412
413                 //find ranks of distances
414                 int index = 0;
415                 int indexSum = 0;
416                 for(int i=0;i<cells.size()-1;i++){
417
418                         index = i;
419                         indexSum = i + 1;
420                         while(dist[cells[index].seq1][cells[index].seq2] == dist[cells[index+1].seq1][cells[index+1].seq2]){
421                                 index++;                                
422                                 indexSum += index + 1;
423                         }
424                         
425                         if(index == i){
426                                 ranks[cells[i].seq1][cells[i].seq2] = i+1;
427                         }
428                         else{
429                                 double aveIndex = (double)indexSum / (double)(index - i + 1);
430                                 for(int j=i;j<=index;j++){
431                                         ranks[cells[j].seq1][cells[j].seq2] = aveIndex;
432                                 }                                       
433                                 i = index;
434                         }
435                 }
436                 
437                 if(indexSum == cells.size() - 1){
438                         ranks[cells[cells.size()-1].seq1][cells[cells.size()-1].seq2] = indexSum + 1;
439                 }
440
441                 return ranks;
442         }
443         catch(exception& e) {
444                 m->errorOut(e, "AnosimCommand", "convertToRanks");
445                 exit(1);
446         }
447 }
448
449 //**********************************************************************************************************************
450
451 map<string, vector<int> > AnosimCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
452         try{
453                 vector<int> sampleIndices;
454                 vector<int> samplesPerGroup;
455                 
456                 map<string, vector<int> >::iterator it;
457                 for(it=origMapping.begin();it!=origMapping.end();it++){
458                         vector<int> indices = it->second;
459                         samplesPerGroup.push_back(indices.size());
460                         sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
461                 }
462                 
463                 random_shuffle(sampleIndices.begin(), sampleIndices.end());
464                 
465                 int index = 0;
466                 map<string, vector<int> > randomizedGroups = origMapping;
467                 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
468                         for(int i=0;i<it->second.size();i++){
469                                 it->second[i] = sampleIndices[index++];                         
470                         }
471                 }
472                 
473                 return randomizedGroups;                
474         }
475         catch (exception& e) {
476                 m->errorOut(e, "AnosimCommand", "randomizeGroups");
477                 exit(1);
478         }
479 }
480
481 //**********************************************************************************************************************
482
483
484