]> git.donarmstrong.com Git - mothur.git/blob - anosimcommand.cpp
added load.logfile command. changed summary.single output for subsample=t.
[mothur.git] / anosimcommand.cpp
1 /*
2  *  anosimcommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 2/14/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "anosimcommand.h"
11 #include "inputdata.h"
12 #include "readphylipvector.h"
13
14 //**********************************************************************************************************************
15 vector<string> AnosimCommand::setParameters(){  
16         try {
17                 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
18                 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pphylip);
19                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
20                 CommandParameter palpha("alpha", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(palpha);
21                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
22                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
23
24                 vector<string> myArray;
25                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
26                 return myArray;
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "AnosimCommand", "setParameters");
30                 exit(1);
31         }
32 }
33 //**********************************************************************************************************************
34 string AnosimCommand::getHelpString(){  
35         try {
36                 string helpString = "";
37                 helpString += "Referenced: Clarke, K. R. (1993). Non-parametric multivariate analysis of changes in community structure.   _Australian Journal of Ecology_ 18, 117-143.\n";
38                 helpString += "The anosim command outputs a .anosim file. \n";
39                 helpString += "The anosim command parameters are phylip, iters, and alpha.  The phylip and design parameters are required, unless you have valid current files.\n";
40                 helpString += "The design parameter allows you to assign your samples to groups when you are running anosim. It is required. \n";
41                 helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.\n";
42                 helpString += "The iters parameter allows you to set number of randomization for the P value.  The default is 1000. \n";
43                 helpString += "The anosim command should be in the following format: anosim(phylip=file.dist, design=file.design).\n";
44                 helpString += "Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).\n";
45                 return helpString;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "AnosimCommand", "getHelpString");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 string AnosimCommand::getOutputFileNameTag(string type, string inputName=""){   
54         try {
55         string outputFileName = "";
56                 map<string, vector<string> >::iterator it;
57         
58         //is this a type this command creates
59         it = outputTypes.find(type);
60         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
61         else {
62             if (type == "anosim") {  outputFileName =  "anosim"; }
63             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
64         }
65         return outputFileName;
66         }
67         catch(exception& e) {
68                 m->errorOut(e, "AnosimCommand", "getOutputFileNameTag");
69                 exit(1);
70         }
71 }
72 //**********************************************************************************************************************
73 AnosimCommand::AnosimCommand(){ 
74         try {
75                 abort = true; calledHelp = true;
76                 setParameters();
77                 vector<string> tempOutNames;
78                 outputTypes["anosim"] = tempOutNames;
79         }
80         catch(exception& e) {
81                 m->errorOut(e, "AnosimCommand", "AnosimCommand");
82                 exit(1);
83         }
84 }
85 //**********************************************************************************************************************
86
87 AnosimCommand::AnosimCommand(string option) {
88         try {
89                 abort = false; calledHelp = false;   
90                 
91                 //allow user to run help
92                 if(option == "help") { help(); abort = true; calledHelp = true; }
93                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
94                 
95                 else {
96                         vector<string> myArray = setParameters();
97                         
98                         OptionParser parser(option);
99                         map<string,string> parameters = parser.getParameters();
100                         
101                         ValidParameters validParameter;
102                         
103                         //check to make sure all parameters are valid for command
104                         map<string,string>::iterator it;
105                         for (it = parameters.begin(); it != parameters.end(); it++) { 
106                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
107                         }
108                         
109                         //initialize outputTypes
110                         vector<string> tempOutNames;
111                         outputTypes["anosim"] = tempOutNames;
112                         
113                         //if the user changes the output directory command factory will send this info to us in the output parameter 
114                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
115                         
116                         //if the user changes the input directory command factory will send this info to us in the output parameter 
117                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
118                         if (inputDir == "not found"){   inputDir = "";          }
119                         else {
120                                 string path;
121                                 it = parameters.find("design");
122                                 //user has given a template file
123                                 if(it != parameters.end()){ 
124                                         path = m->hasPath(it->second);
125                                         //if the user has not given a path then, add inputdir. else leave path alone.
126                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
127                                 }
128                                 
129                                 it = parameters.find("phylip");
130                                 //user has given a template file
131                                 if(it != parameters.end()){ 
132                                         path = m->hasPath(it->second);
133                                         //if the user has not given a path then, add inputdir. else leave path alone.
134                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
135                                 }
136                         }
137                         
138                         phylipFileName = validParameter.validFile(parameters, "phylip", true);
139                         if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
140                         else if (phylipFileName == "not found") { 
141                                 //if there is a current phylip file, use it
142                                 phylipFileName = m->getPhylipFile(); 
143                                 if (phylipFileName != "") { m->mothurOut("Using " + phylipFileName + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
144                                 else {  m->mothurOut("You have no current phylip file and the phylip parameter is required."); m->mothurOutEndLine(); abort = true; }
145                                 
146                         }else { m->setPhylipFile(phylipFileName); }     
147                         
148                         //check for required parameters
149                         designFileName = validParameter.validFile(parameters, "design", true);
150                         if (designFileName == "not open") { designFileName = ""; abort = true; }
151                         else if (designFileName == "not found") {
152                                 //if there is a current design file, use it
153                                 designFileName = m->getDesignFile(); 
154                                 if (designFileName != "") { m->mothurOut("Using " + designFileName + " as input file for the design parameter."); m->mothurOutEndLine(); }
155                                 else {  m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }                                                           
156                         }else { m->setDesignFile(designFileName); }     
157                         
158                         string temp = validParameter.validFile(parameters, "iters", false);
159                         if (temp == "not found") { temp = "1000"; }
160                         m->mothurConvert(temp, iters); 
161                         
162                         temp = validParameter.validFile(parameters, "alpha", false);
163                         if (temp == "not found") { temp = "0.05"; }
164                         m->mothurConvert(temp, experimentwiseAlpha); 
165                 }
166                 
167         }
168         catch(exception& e) {
169                 m->errorOut(e, "AnosimCommand", "AnosimCommand");
170                 exit(1);
171         }
172 }
173 //**********************************************************************************************************************
174 int AnosimCommand::execute(){
175         try {
176                 
177                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
178                 
179                 //read design file
180                 designMap = new GroupMap(designFileName);
181                 designMap->readDesignMap();
182                 
183                 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
184                 
185                 //read in distance matrix and square it
186                 ReadPhylipVector readMatrix(phylipFileName);
187                 vector<string> sampleNames = readMatrix.read(distanceMatrix);
188                 
189                 for(int i=0;i<distanceMatrix.size();i++){
190                         for(int j=0;j<i;j++){
191                                 distanceMatrix[i][j] *= distanceMatrix[i][j];   
192                         }
193                 }
194                 
195                 //link designMap to rows/columns in distance matrix
196                 map<string, vector<int> > origGroupSampleMap;
197                 for(int i=0;i<sampleNames.size();i++){
198                         string group = designMap->getGroup(sampleNames[i]);
199                         
200                         if (group == "not found") {
201                                 m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
202                         }else { origGroupSampleMap[group].push_back(i); }
203                 }
204                 int numGroups = origGroupSampleMap.size();
205                 
206                 if (m->control_pressed) { delete designMap; return 0; }
207                 
208                 //create a new filename
209                 ofstream ANOSIMFile;
210                 string ANOSIMFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName)) + getOutputFileNameTag("anosim");                          
211                 m->openOutputFile(ANOSIMFileName, ANOSIMFile);
212                 outputNames.push_back(ANOSIMFileName); outputTypes["anosim"].push_back(ANOSIMFileName);
213                 m->mothurOut("\ncomparison\tR-value\tP-value\n");
214                 ANOSIMFile << "comparison\tR-value\tP-value\n";
215                 
216                 
217                 double fullANOSIMPValue = runANOSIM(ANOSIMFile, distanceMatrix, origGroupSampleMap, experimentwiseAlpha);
218                 
219                 
220                 if(fullANOSIMPValue <= experimentwiseAlpha && numGroups > 2){
221
222                         int numCombos = numGroups * (numGroups-1) / 2;
223                         double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
224
225                         for(map<string, vector<int> >::iterator itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
226                                 map<string, vector<int> >::iterator itB = itA;
227                                 itB++;
228                                 for(itB;itB!=origGroupSampleMap.end();itB++){
229                                         
230                                         map<string, vector<int> > subGroupSampleMap;
231                                         
232                                         subGroupSampleMap[itA->first] = itA->second;    string groupA = itA->first;
233                                         subGroupSampleMap[itB->first] = itB->second;    string groupB = itB->first;
234                         
235                                         vector<int> subIndices;
236                                         for(map<string, vector<int> >::iterator it=subGroupSampleMap.begin();it!=subGroupSampleMap.end();it++){
237                                                 subIndices.insert(subIndices.end(), it->second.begin(), it->second.end());
238                                         }
239                                         int subNumSamples = subIndices.size();
240
241                                         sort(subIndices.begin(), subIndices.end());             
242                                         
243                                         vector<vector<double> > subDistMatrix(distanceMatrix.size());
244                                         for(int i=0;i<distanceMatrix.size();i++){
245                                                 subDistMatrix[i].assign(distanceMatrix.size(), -1);
246                                         }
247
248                                         for(int i=0;i<subNumSamples;i++){
249                                                 for(int j=0;j<i;j++){
250                                                         subDistMatrix[subIndices[i]][subIndices[j]] = distanceMatrix[subIndices[i]][subIndices[j]];
251                                                 }
252                                         }
253
254                                         runANOSIM(ANOSIMFile, subDistMatrix, subGroupSampleMap, pairwiseAlpha);
255
256                                 }
257                         }
258                         
259                         m->mothurOut("\nExperiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
260                         m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
261                 }
262                 else{
263                         m->mothurOut("\nExperiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
264                 }
265                 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
266                 ANOSIMFile.close();
267                 
268                         
269                 delete designMap;
270                                 
271                 m->mothurOutEndLine();
272                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
273                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
274                 m->mothurOutEndLine();
275                 
276                 return 0;
277         }
278         catch(exception& e) {
279                 m->errorOut(e, "AnosimCommand", "execute");
280                 exit(1);
281         }
282 }
283 //**********************************************************************************************************************
284
285 double AnosimCommand::runANOSIM(ofstream& ANOSIMFile, vector<vector<double> > dMatrix, map<string, vector<int> > groupSampleMap, double alpha) {
286         try {
287
288                 
289                 vector<vector<double> > rankMatrix = convertToRanks(dMatrix);
290                 double RValue = calcR(rankMatrix, groupSampleMap);
291                 
292                 int pCount = 0;
293                 for(int i=0;i<iters;i++){
294                         map<string, vector<int> > randGroupSampleMap = getRandomizedGroups(groupSampleMap);
295                         double RValueRand = calcR(rankMatrix, randGroupSampleMap);
296                         if(RValue <= RValueRand){       pCount++;       }
297                 }
298
299                 double pValue = (double)pCount / (double) iters;
300                 string pString = "";
301                 if(pValue < 1/(double)iters){   pString = '<' + toString(1/(double)iters);      }
302                 else                                            {       pString = toString(pValue);                                     }
303                 
304                 
305                 map<string, vector<int> >::iterator it=groupSampleMap.begin();
306                 m->mothurOut(it->first);
307                 ANOSIMFile << it->first;
308                 it++;
309                 for(it;it!=groupSampleMap.end();it++){
310                         m->mothurOut('-' + it->first);
311                         ANOSIMFile << '-' << it->first;
312                 
313                 }
314                 m->mothurOut('\t' + toString(RValue) + '\t' + pString);
315                 ANOSIMFile << '\t' << RValue << '\t' << pString;
316
317                 if(pValue < alpha){
318                         ANOSIMFile << "*";
319                         m->mothurOut("*");
320                 }
321                 ANOSIMFile << endl;
322                 m->mothurOutEndLine();
323                 
324                 return pValue;
325         }
326         catch(exception& e) {
327                 m->errorOut(e, "AnosimCommand", "calcAnisom");
328                 exit(1);
329         }
330 }
331
332 //**********************************************************************************************************************
333
334 double AnosimCommand::calcR(vector<vector<double> > rankMatrix, map<string, vector<int> > groupSampleMap){
335         try {
336
337                 int numSamples = 0;
338                 for(map<string, vector<int> >::iterator it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
339                         numSamples += it->second.size();
340                 }
341                 
342                 
343                 double within = 0.0;
344                 int numWithinComps = 0;         
345                 
346                 for(map<string, vector<int> >::iterator it=groupSampleMap.begin();it!=groupSampleMap.end();it++){
347                         vector<int> indices = it->second;
348                         for(int i=0;i<indices.size();i++){
349                                 for(int j=0;j<i;j++){
350                                         if(indices[i] > indices[j])     {       within += rankMatrix[indices[i]][indices[j]];   }
351                                         else                                            {       within += rankMatrix[indices[j]][indices[i]];   }
352                                         numWithinComps++;
353                                 }
354                         }
355                 }
356                 
357                 within /= (float) numWithinComps;
358                 
359                 double between = 0.0;
360                 int numBetweenComps = 0;
361
362                 map<string, vector<int> >::iterator itB;
363                 
364                 for(map<string, vector<int> >::iterator itA=groupSampleMap.begin();itA!=groupSampleMap.end();itA++){
365
366                         for(int i=0;i<itA->second.size();i++){
367                                 int A = itA->second[i];
368                                 map<string, vector<int> >::iterator itB = itA;
369                                 itB++;
370                                 for(itB;itB!=groupSampleMap.end();itB++){
371                                         for(int j=0;j<itB->second.size();j++){
372                                                 int B = itB->second[j];
373                                                 if(A>B) {       between += rankMatrix[A][B];    }
374                                                 else    {       between += rankMatrix[B][A];    }
375                                                 numBetweenComps++;
376                                         }                                       
377                                 }
378                                 
379                         }
380                 }
381                 
382                 
383                 between /= (float) numBetweenComps;
384                 
385                 double Rvalue = (between - within)/(numSamples * (numSamples-1) / 4.0);
386                                 
387                 return Rvalue;
388         }
389         catch(exception& e) {
390                 m->errorOut(e, "AnosimCommand", "calcWithinBetween");
391                 exit(1);
392         }
393 }
394
395 //**********************************************************************************************************************
396
397 vector<vector<double> > AnosimCommand::convertToRanks(vector<vector<double> > dist) {
398         try {
399                 vector<seqDist> cells;
400                 vector<vector<double> > ranks = dist;
401                 
402                 for (int i = 0; i < dist.size(); i++) {
403                         for (int j = 0; j < i; j++) {
404                                 if(dist[i][j] != -1){
405                                         seqDist member(i, j, dist[i][j]);
406                                         cells.push_back(member);
407                                 }
408                         }
409                 }
410                 
411                 
412                 //sort distances
413                 sort(cells.begin(), cells.end(), compareSequenceDistance);      
414
415                 //find ranks of distances
416                 int index = 0;
417                 int indexSum = 0;
418                 for(int i=0;i<cells.size()-1;i++){
419
420                         index = i;
421                         indexSum = i + 1;
422                         while(dist[cells[index].seq1][cells[index].seq2] == dist[cells[index+1].seq1][cells[index+1].seq2]){
423                                 index++;                                
424                                 indexSum += index + 1;
425                         }
426                         
427                         if(index == i){
428                                 ranks[cells[i].seq1][cells[i].seq2] = i+1;
429                         }
430                         else{
431                                 double aveIndex = (double)indexSum / (double)(index - i + 1);
432                                 for(int j=i;j<=index;j++){
433                                         ranks[cells[j].seq1][cells[j].seq2] = aveIndex;
434                                 }                                       
435                                 i = index;
436                         }
437                 }
438                 
439                 if(indexSum == cells.size() - 1){
440                         ranks[cells[cells.size()-1].seq1][cells[cells.size()-1].seq2] = indexSum + 1;
441                 }
442
443                 return ranks;
444         }
445         catch(exception& e) {
446                 m->errorOut(e, "AnosimCommand", "convertToRanks");
447                 exit(1);
448         }
449 }
450
451 //**********************************************************************************************************************
452
453 map<string, vector<int> > AnosimCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
454         try{
455                 vector<int> sampleIndices;
456                 vector<int> samplesPerGroup;
457                 
458                 map<string, vector<int> >::iterator it;
459                 for(it=origMapping.begin();it!=origMapping.end();it++){
460                         vector<int> indices = it->second;
461                         samplesPerGroup.push_back(indices.size());
462                         sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
463                 }
464                 
465                 random_shuffle(sampleIndices.begin(), sampleIndices.end());
466                 
467                 int index = 0;
468                 map<string, vector<int> > randomizedGroups = origMapping;
469                 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
470                         for(int i=0;i<it->second.size();i++){
471                                 it->second[i] = sampleIndices[index++];                         
472                         }
473                 }
474                 
475                 return randomizedGroups;                
476         }
477         catch (exception& e) {
478                 m->errorOut(e, "AnosimCommand", "randomizeGroups");
479                 exit(1);
480         }
481 }
482
483 //**********************************************************************************************************************
484
485
486