]> git.donarmstrong.com Git - mothur.git/blob - homovacommand.cpp
some minor changes
[mothur.git] / homovacommand.cpp
1 /*
2  *  homovacommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 2/8/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "homovacommand.h"
11 #include "groupmap.h"
12 #include "readphylipvector.h"
13
14 //**********************************************************************************************************************
15
16 vector<string> HomovaCommand::getValidParameters(){     
17         try {
18                 string Array[] =  {"outputdir","iters","phylip","design","alpha", "inputdir"};
19                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
20                 return myArray;
21         }
22         catch(exception& e) {
23                 m->errorOut(e, "HomovaCommand", "getValidParameters");
24                 exit(1);
25         }
26 }
27
28 //**********************************************************************************************************************
29
30 HomovaCommand::HomovaCommand(){ 
31         try {
32                 abort = true; calledHelp = true; 
33                 vector<string> tempOutNames;
34                 outputTypes["homova"] = tempOutNames;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "HomovaCommand", "HomovaCommand");
38                 exit(1);
39         }
40 }
41
42 //**********************************************************************************************************************
43
44 vector<string> HomovaCommand::getRequiredParameters(){  
45         try {
46                 string Array[] =  {"design"};
47                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "HomovaCommand", "getRequiredParameters");
52                 exit(1);
53         }
54 }
55
56 //**********************************************************************************************************************
57
58 vector<string> HomovaCommand::getRequiredFiles(){       
59         try {
60                 string Array[] =  {};
61                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
62                 return myArray;
63         }
64         catch(exception& e) {
65                 m->errorOut(e, "HomovaCommand", "getRequiredFiles");
66                 exit(1);
67         }
68 }
69
70 //**********************************************************************************************************************
71
72 HomovaCommand::HomovaCommand(string option) {
73         try {
74                 abort = false; calledHelp = false;   
75                 
76                 //allow user to run help
77                 if(option == "help") { help(); abort = true; calledHelp = true; }
78                 
79                 else {
80                         //valid paramters for this command
81                         string AlignArray[] =  {"design","outputdir","iters","phylip","alpha", "inputdir"};
82                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
83                         
84                         OptionParser parser(option);
85                         map<string,string> parameters = parser.getParameters();
86                         
87                         ValidParameters validParameter;
88                         
89                         //check to make sure all parameters are valid for command
90                         map<string,string>::iterator it;
91                         for (it = parameters.begin(); it != parameters.end(); it++) { 
92                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
93                         }
94                         
95                         //initialize outputTypes
96                         vector<string> tempOutNames;
97                         outputTypes["homova"] = tempOutNames;
98                         
99                         //if the user changes the output directory command factory will send this info to us in the output parameter 
100                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
101                         
102                         //if the user changes the input directory command factory will send this info to us in the output parameter 
103                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
104                         if (inputDir == "not found"){   inputDir = "";          }
105                         else {
106                                 string path;
107                                 it = parameters.find("design");
108                                 //user has given a template file
109                                 if(it != parameters.end()){ 
110                                         path = m->hasPath(it->second);
111                                         //if the user has not given a path then, add inputdir. else leave path alone.
112                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
113                                 }
114                                 
115                                 it = parameters.find("phylip");
116                                 //user has given a template file
117                                 if(it != parameters.end()){ 
118                                         path = m->hasPath(it->second);
119                                         //if the user has not given a path then, add inputdir. else leave path alone.
120                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
121                                 }
122                         }
123                         
124                         phylipFileName = validParameter.validFile(parameters, "phylip", true);
125                         if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
126                         else if (phylipFileName == "not found") { phylipFileName = ""; }        
127                         else if (designFileName == "not found") {
128                                 designFileName = "";
129                                 m->mothurOut("You must provide an phylip file.");
130                                 m->mothurOutEndLine();
131                                 abort = true;
132                         }       
133                         
134                         //check for required parameters
135                         designFileName = validParameter.validFile(parameters, "design", true);
136                         if (designFileName == "not open") { abort = true; }
137                         else if (designFileName == "not found") {
138                                 designFileName = "";
139                                 m->mothurOut("You must provide an design file.");
140                                 m->mothurOutEndLine();
141                                 abort = true;
142                         }       
143                         
144                         string temp = validParameter.validFile(parameters, "iters", false);
145                         if (temp == "not found") { temp = "1000"; }
146                         convert(temp, iters); 
147                         
148                         temp = validParameter.validFile(parameters, "alpha", false);
149                         if (temp == "not found") { temp = "0.05"; }
150                         convert(temp, experimentwiseAlpha); 
151                 }
152                 
153         }
154         catch(exception& e) {
155                 m->errorOut(e, "HomovaCommand", "HomovaCommand");
156                 exit(1);
157         }
158 }
159
160 //**********************************************************************************************************************
161
162 void HomovaCommand::help(){
163         try {
164                 m->mothurOut("Referenced: Stewart CN, Excoffier L (1996). Assessing population genetic structure and variability with RAPD data: Application to Vaccinium macrocarpon (American Cranberry). J Evol Biol 9: 153-71.\n");
165                 m->mothurOut("The homova command outputs a .homova file. \n");
166                 m->mothurOut("The homova command parameters are phylip, iters, and alpha.  The phylip and design parameters are required.\n");
167                 m->mothurOut("The design parameter allows you to assign your samples to groups when you are running homova. It is required. \n");
168                 m->mothurOut("The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.\n");
169                 m->mothurOut("The iters parameter allows you to set number of randomization for the P value.  The default is 1000. \n");
170                 m->mothurOut("The homova command should be in the following format: homova(phylip=file.dist, design=file.design).\n");
171                 m->mothurOut("Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).\n\n");
172         }
173         catch(exception& e) {
174                 m->errorOut(e, "HomovaCommand", "help");
175                 exit(1);
176         }
177 }
178
179 //**********************************************************************************************************************
180
181 HomovaCommand::~HomovaCommand(){}
182
183 //**********************************************************************************************************************
184
185 int HomovaCommand::execute(){
186         try {
187                 
188                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
189                 
190                 //read design file
191                 designMap = new GroupMap(designFileName);
192                 designMap->readDesignMap();
193                 
194                 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
195                 
196                 //read in distance matrix and square it
197                 ReadPhylipVector readMatrix(phylipFileName);
198                 vector<string> sampleNames = readMatrix.read(distanceMatrix);
199                 
200                 for(int i=0;i<distanceMatrix.size();i++){
201                         for(int j=0;j<i;j++){
202                                 distanceMatrix[i][j] *= distanceMatrix[i][j];   
203                         }
204                 }
205                 
206                 //link designMap to rows/columns in distance matrix
207                 map<string, vector<int> > origGroupSampleMap;
208                 for(int i=0;i<sampleNames.size();i++){
209                         origGroupSampleMap[designMap->getGroup(sampleNames[i])].push_back(i);
210                 }
211                 int numGroups = origGroupSampleMap.size();
212                 
213                 //create a new filename
214                 ofstream HOMOVAFile;
215                 string HOMOVAFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName))  + "homova";                               
216                 m->openOutputFile(HOMOVAFileName, HOMOVAFile);
217                 outputNames.push_back(HOMOVAFileName); outputTypes["homova"].push_back(HOMOVAFileName);
218                 
219                 HOMOVAFile << "HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values" << endl;
220                 m->mothurOut("HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values\n");
221                 
222                 double fullHOMOVAPValue = runHOMOVA(HOMOVAFile, origGroupSampleMap, experimentwiseAlpha);
223
224                 if(fullHOMOVAPValue <= experimentwiseAlpha && numGroups > 2){
225                         
226                         int numCombos = numGroups * (numGroups-1) / 2;
227                         double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
228                         
229                         map<string, vector<int> >::iterator itA;
230                         map<string, vector<int> >::iterator itB;
231                         
232                         for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
233                                 itB = itA;itB++;
234                                 for(;itB!=origGroupSampleMap.end();itB++){
235                                         map<string, vector<int> > pairwiseGroupSampleMap;
236                                         pairwiseGroupSampleMap[itA->first] = itA->second;
237                                         pairwiseGroupSampleMap[itB->first] = itB->second;
238                                         
239                                         runHOMOVA(HOMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha);
240                                 }                       
241                         }
242                         HOMOVAFile << endl;
243                         m->mothurOutEndLine();
244                         
245                         m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
246                         m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
247                 }
248                 else{
249                         m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
250                 }
251                 
252                 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
253                 
254                 delete designMap;
255                 
256                 m->mothurOutEndLine();
257                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
258                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
259                 m->mothurOutEndLine();
260                 
261                 return 0;
262         }
263         catch(exception& e) {
264                 m->errorOut(e, "HomovaCommand", "execute");
265                 exit(1);
266         }
267 }
268
269 //**********************************************************************************************************************
270
271 double HomovaCommand::runHOMOVA(ofstream& HOMOVAFile, map<string, vector<int> > groupSampleMap, double alpha){
272         try {
273                 map<string, vector<int> >::iterator it;
274                 int numGroups = groupSampleMap.size();
275                 
276                 vector<double> ssWithinOrigVector;
277                 double bValueOrig = calcBValue(groupSampleMap, ssWithinOrigVector);
278                 
279                 double counter = 0;
280                 for(int i=0;i<iters;i++){
281                         vector<double> ssWithinRandVector;
282                         map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
283                         double bValueRand = calcBValue(randomizedGroup, ssWithinRandVector);
284                         if(bValueRand > bValueOrig){    counter++;      }
285                 }
286                 
287                 double pValue = (double) counter / (double) iters;
288                 string pString = "";
289                 if(pValue < 1/(double)iters){   pString = '<' + toString(1/(double)iters);      }
290                 else                                            {       pString = toString(pValue);                                     }
291                 
292                 
293                 //print homova table
294                 it = groupSampleMap.begin();
295                 HOMOVAFile << it->first;
296                 m->mothurOut(it->first);
297                 it++;
298                 for(;it!=groupSampleMap.end();it++){
299                         HOMOVAFile << '-' << it->first;
300                         m->mothurOut('-' + it->first);
301                 }
302
303                 HOMOVAFile << '\t' << bValueOrig << '\t' << pString;
304                 m->mothurOut('\t' + toString(bValueOrig) + '\t' + pString);
305                 
306                 if(pValue < alpha){
307                         HOMOVAFile << "*";
308                         m->mothurOut("*");
309                 }
310
311                 for(int i=0;i<numGroups;i++){
312                         HOMOVAFile << '\t' << ssWithinOrigVector[i];
313                         m->mothurOut('\t' + toString(ssWithinOrigVector[i]));
314                 }
315                 HOMOVAFile << endl;
316                 m->mothurOutEndLine();
317                 
318                 return pValue;  
319         }
320         catch(exception& e) {
321                 m->errorOut(e, "HomovaCommand", "runHOMOVA");
322                 exit(1);
323         }
324 }
325
326 //**********************************************************************************************************************
327
328 double HomovaCommand::calcSigleSSWithin(vector<int> sampleIndices) {
329         try {
330                 double ssWithin = 0.0;
331                 int numSamplesInGroup = sampleIndices.size();
332                 
333                 for(int i=0;i<numSamplesInGroup;i++){
334                         int row = sampleIndices[i];
335                         
336                         for(int j=0;j<numSamplesInGroup;j++){
337                                 int col = sampleIndices[j];
338                                 
339                                 if(col < row){
340                                         ssWithin += distanceMatrix[row][col];
341                                 }
342                                 
343                         }
344                 }
345                 
346                 ssWithin /= numSamplesInGroup;
347                 return ssWithin;
348         }
349         catch(exception& e) {
350                 m->errorOut(e, "HomovaCommand", "calcSigleSSWithin");
351                 exit(1);
352         }
353 }
354
355 //**********************************************************************************************************************
356
357 double HomovaCommand::calcBValue(map<string, vector<int> > groupSampleMap, vector<double>& ssWithinVector) {
358         try {
359
360                 map<string, vector<int> >::iterator it;
361                 
362                 double numGroups = (double)groupSampleMap.size();
363                 ssWithinVector.resize(numGroups, 0);
364                 
365                 double totalNumSamples = 0;
366                 double ssWithinFull;
367                 double secondTermSum = 0;
368                 double inverseOneMinusSum = 0;
369                 int index = 0;
370                 
371                 ssWithinVector.resize(numGroups, 0);
372                 for(it = groupSampleMap.begin();it!=groupSampleMap.end();it++){
373                         int numSamplesInGroup = it->second.size();
374                         totalNumSamples += numSamplesInGroup;
375                         
376                         ssWithinVector[index] = calcSigleSSWithin(it->second);
377                         ssWithinFull += ssWithinVector[index];
378                         
379                         secondTermSum += (numSamplesInGroup - 1) * log(ssWithinVector[index] / (double)(numSamplesInGroup - 1));
380                         inverseOneMinusSum += 1.0 / (double)(numSamplesInGroup - 1);
381                         
382                         ssWithinVector[index] /= (double)(numSamplesInGroup - 1); //this line is only for output purposes to scale SSw by the number of samples in the group
383                         index++;
384                 }
385                 
386                 double B = (totalNumSamples - numGroups) * log(ssWithinFull/(totalNumSamples-numGroups)) - secondTermSum;
387                 double denomintor = 1 + 1.0/(3.0 * (numGroups - 1.0)) * (inverseOneMinusSum - 1.0 / (double) (totalNumSamples - numGroups));
388                 B /= denomintor;
389                 
390                 return B;
391                 
392         }
393         catch(exception& e) {
394                 m->errorOut(e, "HomovaCommand", "calcBValue");
395                 exit(1);
396         }
397 }
398
399 //**********************************************************************************************************************
400
401 map<string, vector<int> > HomovaCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
402         try{
403                 vector<int> sampleIndices;
404                 vector<int> samplesPerGroup;
405                 
406                 map<string, vector<int> >::iterator it;
407                 for(it=origMapping.begin();it!=origMapping.end();it++){
408                         vector<int> indices = it->second;
409                         samplesPerGroup.push_back(indices.size());
410                         sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
411                 }
412                 
413                 random_shuffle(sampleIndices.begin(), sampleIndices.end());
414                 
415                 int index = 0;
416                 map<string, vector<int> > randomizedGroups = origMapping;
417                 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
418                         for(int i=0;i<it->second.size();i++){
419                                 it->second[i] = sampleIndices[index++];                         
420                         }
421                 }
422                 
423                 return randomizedGroups;                
424         }
425         catch (exception& e) {
426                 m->errorOut(e, "AmovaCommand", "randomizeGroups");
427                 exit(1);
428         }
429 }
430
431 //**********************************************************************************************************************
432
433