]> git.donarmstrong.com Git - mothur.git/blob - anosimcommand.cpp
added citation function to commands
[mothur.git] / anosimcommand.cpp
1 /*
2  *  anosimcommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 2/14/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "anosimcommand.h"
11 #include "inputdata.h"
12 #include "readphylipvector.h"
13
14 //**********************************************************************************************************************
15 vector<string> AnosimCommand::setParameters(){  
16         try {
17                 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
18                 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pphylip);
19                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
20                 CommandParameter palpha("alpha", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(palpha);
21                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
22                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
23
24                 vector<string> myArray;
25                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
26                 return myArray;
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "AnosimCommand", "setParameters");
30                 exit(1);
31         }
32 }
33 //**********************************************************************************************************************
34 string AnosimCommand::getHelpString(){  
35         try {
36                 string helpString = "";
37                 helpString += "Referenced: Clarke, K. R. (1993). Non-parametric multivariate analysis of changes in community structure.   _Australian Journal of Ecology_ 18, 117-143.\n";
38                 helpString += "The anosim command outputs a .anosim file. \n";
39                 helpString += "The anosim command parameters are phylip, iters, and alpha.  The phylip and design parameters are required, unless you have valid current files.\n";
40                 helpString += "The design parameter allows you to assign your samples to groups when you are running anosim. It is required. \n";
41                 helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.\n";
42                 helpString += "The iters parameter allows you to set number of randomization for the P value.  The default is 1000. \n";
43                 helpString += "The anosim command should be in the following format: anosim(phylip=file.dist, design=file.design).\n";
44                 helpString += "Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).\n";
45                 return helpString;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "AnosimCommand", "getHelpString");
49                 exit(1);
50         }
51 }
52
53 //**********************************************************************************************************************
54 AnosimCommand::AnosimCommand(){ 
55         try {
56                 abort = true; calledHelp = true;
57                 setParameters();
58                 vector<string> tempOutNames;
59                 outputTypes["anosim"] = tempOutNames;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "AnosimCommand", "AnosimCommand");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67
68 AnosimCommand::AnosimCommand(string option) {
69         try {
70                 abort = false; calledHelp = false;   
71                 
72                 //allow user to run help
73                 if(option == "help") { help(); abort = true; calledHelp = true; }
74                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
75                 
76                 else {
77                         vector<string> myArray = setParameters();
78                         
79                         OptionParser parser(option);
80                         map<string,string> parameters = parser.getParameters();
81                         
82                         ValidParameters validParameter;
83                         
84                         //check to make sure all parameters are valid for command
85                         map<string,string>::iterator it;
86                         for (it = parameters.begin(); it != parameters.end(); it++) { 
87                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
88                         }
89                         
90                         //initialize outputTypes
91                         vector<string> tempOutNames;
92                         outputTypes["anosim"] = tempOutNames;
93                         
94                         //if the user changes the output directory command factory will send this info to us in the output parameter 
95                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
96                         
97                         //if the user changes the input directory command factory will send this info to us in the output parameter 
98                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
99                         if (inputDir == "not found"){   inputDir = "";          }
100                         else {
101                                 string path;
102                                 it = parameters.find("design");
103                                 //user has given a template file
104                                 if(it != parameters.end()){ 
105                                         path = m->hasPath(it->second);
106                                         //if the user has not given a path then, add inputdir. else leave path alone.
107                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
108                                 }
109                                 
110                                 it = parameters.find("phylip");
111                                 //user has given a template file
112                                 if(it != parameters.end()){ 
113                                         path = m->hasPath(it->second);
114                                         //if the user has not given a path then, add inputdir. else leave path alone.
115                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
116                                 }
117                         }
118                         
119                         phylipFileName = validParameter.validFile(parameters, "phylip", true);
120                         if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
121                         else if (phylipFileName == "not found") { 
122                                 //if there is a current phylip file, use it
123                                 phylipFileName = m->getPhylipFile(); 
124                                 if (phylipFileName != "") { m->mothurOut("Using " + phylipFileName + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
125                                 else {  m->mothurOut("You have no current phylip file and the phylip parameter is required."); m->mothurOutEndLine(); abort = true; }
126                                 
127                         }       
128                         
129                         //check for required parameters
130                         designFileName = validParameter.validFile(parameters, "design", true);
131                         if (designFileName == "not open") { abort = true; }
132                         else if (designFileName == "not found") {
133                                 //if there is a current design file, use it
134                                 designFileName = m->getDesignFile(); 
135                                 if (designFileName != "") { m->mothurOut("Using " + designFileName + " as input file for the design parameter."); m->mothurOutEndLine(); }
136                                 else {  m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }                                                           
137                         }       
138                         
139                         string temp = validParameter.validFile(parameters, "iters", false);
140                         if (temp == "not found") { temp = "1000"; }
141                         convert(temp, iters); 
142                         
143                         temp = validParameter.validFile(parameters, "alpha", false);
144                         if (temp == "not found") { temp = "0.05"; }
145                         convert(temp, experimentwiseAlpha); 
146                 }
147                 
148         }
149         catch(exception& e) {
150                 m->errorOut(e, "AnosimCommand", "AnosimCommand");
151                 exit(1);
152         }
153 }
154 //**********************************************************************************************************************
155 int AnosimCommand::execute(){
156         try {
157                 
158                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
159                 
160                 //read design file
161                 designMap = new GroupMap(designFileName);
162                 designMap->readDesignMap();
163                 
164                 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
165                 
166                 //read in distance matrix and square it
167                 ReadPhylipVector readMatrix(phylipFileName);
168                 vector<string> sampleNames = readMatrix.read(distanceMatrix);
169                 
170                 for(int i=0;i<distanceMatrix.size();i++){
171                         for(int j=0;j<i;j++){
172                                 distanceMatrix[i][j] *= distanceMatrix[i][j];   
173                         }
174                 }
175                 
176                 //link designMap to rows/columns in distance matrix
177                 map<string, vector<int> > origGroupSampleMap;
178                 for(int i=0;i<sampleNames.size();i++){
179                         origGroupSampleMap[designMap->getGroup(sampleNames[i])].push_back(i);
180                 }
181                 int numGroups = origGroupSampleMap.size();
182                 
183                 //create a new filename
184                 ofstream ANOSIMFile;
185                 string ANOSIMFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName))  + "anosim";                               
186                 m->openOutputFile(ANOSIMFileName, ANOSIMFile);
187                 outputNames.push_back(ANOSIMFileName); outputTypes["anosim"].push_back(ANOSIMFileName);
188                 m->mothurOut("\ncomparison\tR-value\tP-value\n");
189                 ANOSIMFile << "comparison\tR-value\tP-value\n";
190                 
191                 
192                 double fullANOSIMPValue = runANOSIM(ANOSIMFile, distanceMatrix, origGroupSampleMap, experimentwiseAlpha);
193                 
194                 
195                 if(fullANOSIMPValue <= experimentwiseAlpha && numGroups > 2){
196
197                         int numCombos = numGroups * (numGroups-1) / 2;
198                         double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
199
200                         for(map<string, vector<int> >::iterator itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
201                                 map<string, vector<int> >::iterator itB = itA;
202                                 itB++;
203                                 for(itB;itB!=origGroupSampleMap.end();itB++){
204                                         
205                                         map<string, vector<int> > subGroupSampleMap;
206                                         
207                                         subGroupSampleMap[itA->first] = itA->second;    string groupA = itA->first;
208                                         subGroupSampleMap[itB->first] = itB->second;    string groupB = itB->first;
209                         
210                                         vector<int> subIndices;
211                                         for(map<string, vector<int> >::iterator it=subGroupSampleMap.begin();it!=subGroupSampleMap.end();it++){
212                                                 subIndices.insert(subIndices.end(), it->second.begin(), it->second.end());
213                                         }
214                                         int subNumSamples = subIndices.size();
215
216                                         sort(subIndices.begin(), subIndices.end());             
217                                         
218                                         vector<vector<double> > subDistMatrix(distanceMatrix.size());
219                                         for(int i=0;i<distanceMatrix.size();i++){
220                                                 subDistMatrix[i].assign(distanceMatrix.size(), -1);
221                                         }
222
223                                         for(int i=0;i<subNumSamples;i++){
224                                                 for(int j=0;j<i;j++){
225                                                         subDistMatrix[subIndices[i]][subIndices[j]] = distanceMatrix[subIndices[i]][subIndices[j]];
226                                                 }
227                                         }
228
229                                         runANOSIM(ANOSIMFile, subDistMatrix, subGroupSampleMap, pairwiseAlpha);
230
231                                 }
232                         }
233                         
234                         m->mothurOut("\nExperiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
235                         m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
236                 }
237                 else{
238                         m->mothurOut("\nExperiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
239                 }
240                 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
241                 ANOSIMFile.close();
242                 
243                         
244                 delete designMap;
245                                 
246                 m->mothurOutEndLine();
247                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
248                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
249                 m->mothurOutEndLine();
250                 
251                 return 0;
252         }
253         catch(exception& e) {
254                 m->errorOut(e, "AnosimCommand", "execute");
255                 exit(1);
256         }
257 }
258 //**********************************************************************************************************************
259
260 double AnosimCommand::runANOSIM(ofstream& ANOSIMFile, vector<vector<double> > dMatrix, map<string, vector<int> > groupSampleMap, double alpha) {
261         try {
262
263                 
264                 vector<vector<double> > rankMatrix = convertToRanks(dMatrix);
265                 double RValue = calcR(rankMatrix, groupSampleMap);
266                 
267                 int pCount = 0;
268                 for(int i=0;i<iters;i++){
269                         map<string, vector<int> > randGroupSampleMap = getRandomizedGroups(groupSampleMap);
270                         double RValueRand = calcR(rankMatrix, randGroupSampleMap);
271                         if(RValue <= RValueRand){       pCount++;       }
272                 }
273
274                 double pValue = (double)pCount / (double) iters;
275                 string pString = "";
276                 if(pValue < 1/(double)iters){   pString = '<' + toString(1/(double)iters);      }
277                 else                                            {       pString = toString(pValue);                                     }
278                 
279                 
280                 map<string, vector<int> >::iterator it=groupSampleMap.begin();
281                 m->mothurOut(it->first);
282                 ANOSIMFile << it->first;
283                 it++;
284                 for(it;it!=groupSampleMap.end();it++){
285                         m->mothurOut('-' + it->first);
286                         ANOSIMFile << '-' << it->first;
287                 
288                 }
289                 m->mothurOut('\t' + toString(RValue) + '\t' + pString);
290                 ANOSIMFile << '\t' << RValue << '\t' << pString;
291
292                 if(pValue < alpha){
293                         ANOSIMFile << "*";
294                         m->mothurOut("*");
295                 }
296                 ANOSIMFile << endl;
297                 m->mothurOutEndLine();
298                 
299                 return pValue;
300         }
301         catch(exception& e) {
302                 m->errorOut(e, "AnosimCommand", "calcAnisom");
303                 exit(1);
304         }
305 }
306
307 //**********************************************************************************************************************
308
309 double AnosimCommand::calcR(vector<vector<double> > rankMatrix, map<string, vector<int> > groupSampleMap){
310         try {
311
312                 int numSamples = 0;
313                 for(map<string, vector<int> >::iterator it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
314                         numSamples += it->second.size();
315                 }
316                 
317                 
318                 double within = 0.0;
319                 int numWithinComps = 0;         
320                 
321                 for(map<string, vector<int> >::iterator it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
322                         vector<int> indices = it->second;
323                         for(int i=0;i<indices.size();i++){
324                                 for(int j=0;j<i;j++){
325                                         if(indices[i] > indices[j])     {       within += rankMatrix[indices[i]][indices[j]];   }
326                                         else                                            {       within += rankMatrix[indices[j]][indices[i]];   }
327                                         numWithinComps++;
328                                 }
329                         }
330                 }
331                 
332                 within /= (float) numWithinComps;
333                 
334                 double between = 0.0;
335                 int numBetweenComps = 0;
336
337                 map<string, vector<int> >::iterator itB;
338                 
339                 for(map<string, vector<int> >::iterator itA=groupSampleMap.begin();itA!=groupSampleMap.end();itA++){
340
341                         for(int i=0;i<itA->second.size();i++){
342                                 int A = itA->second[i];
343                                 map<string, vector<int> >::iterator itB = itA;
344                                 itB++;
345                                 for(itB;itB!=groupSampleMap.end();itB++){
346                                         for(int j=0;j<itB->second.size();j++){
347                                                 int B = itB->second[j];
348                                                 if(A>B) {       between += rankMatrix[A][B];    }
349                                                 else    {       between += rankMatrix[B][A];    }
350                                                 numBetweenComps++;
351                                         }                                       
352                                 }
353                                 
354                         }
355                 }
356                 
357                 
358                 between /= (float) numBetweenComps;
359                 
360                 double Rvalue = (between - within)/(numSamples * (numSamples-1) / 4.0);
361                                 
362                 return Rvalue;
363         }
364         catch(exception& e) {
365                 m->errorOut(e, "AnosimCommand", "calcWithinBetween");
366                 exit(1);
367         }
368 }
369
370 //**********************************************************************************************************************
371
372 vector<vector<double> > AnosimCommand::convertToRanks(vector<vector<double> > dist) {
373         try {
374                 vector<seqDist> cells;
375                 vector<vector<double> > ranks = dist;
376                 
377                 for (int i = 0; i < dist.size(); i++) {
378                         for (int j = 0; j < i; j++) {
379                                 if(dist[i][j] != -1){
380                                         seqDist member(i, j, dist[i][j]);
381                                         cells.push_back(member);
382                                 }
383                         }
384                 }
385                 
386                 
387                 //sort distances
388                 sort(cells.begin(), cells.end(), compareSequenceDistance);      
389
390                 //find ranks of distances
391                 int index = 0;
392                 int indexSum = 0;
393                 for(int i=0;i<cells.size()-1;i++){
394
395                         index = i;
396                         indexSum = i + 1;
397                         while(dist[cells[index].seq1][cells[index].seq2] == dist[cells[index+1].seq1][cells[index+1].seq2]){
398                                 index++;                                
399                                 indexSum += index + 1;
400                         }
401                         
402                         if(index == i){
403                                 ranks[cells[i].seq1][cells[i].seq2] = i+1;
404                         }
405                         else{
406                                 double aveIndex = (double)indexSum / (double)(index - i + 1);
407                                 for(int j=i;j<=index;j++){
408                                         ranks[cells[j].seq1][cells[j].seq2] = aveIndex;
409                                 }                                       
410                                 i = index;
411                         }
412                 }
413                 
414                 if(indexSum == cells.size() - 1){
415                         ranks[cells[cells.size()-1].seq1][cells[cells.size()-1].seq2] = indexSum + 1;
416                 }
417
418                 return ranks;
419         }
420         catch(exception& e) {
421                 m->errorOut(e, "AnosimCommand", "convertToRanks");
422                 exit(1);
423         }
424 }
425
426 //**********************************************************************************************************************
427
428 map<string, vector<int> > AnosimCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
429         try{
430                 vector<int> sampleIndices;
431                 vector<int> samplesPerGroup;
432                 
433                 map<string, vector<int> >::iterator it;
434                 for(it=origMapping.begin();it!=origMapping.end();it++){
435                         vector<int> indices = it->second;
436                         samplesPerGroup.push_back(indices.size());
437                         sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
438                 }
439                 
440                 random_shuffle(sampleIndices.begin(), sampleIndices.end());
441                 
442                 int index = 0;
443                 map<string, vector<int> > randomizedGroups = origMapping;
444                 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
445                         for(int i=0;i<it->second.size();i++){
446                                 it->second[i] = sampleIndices[index++];                         
447                         }
448                 }
449                 
450                 return randomizedGroups;                
451         }
452         catch (exception& e) {
453                 m->errorOut(e, "AnosimCommand", "randomizeGroups");
454                 exit(1);
455         }
456 }
457
458 //**********************************************************************************************************************
459
460
461