]> git.donarmstrong.com Git - mothur.git/blob - homovacommand.cpp
working on current change
[mothur.git] / homovacommand.cpp
1 /*
2  *  homovacommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 2/8/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "homovacommand.h"
11 #include "groupmap.h"
12 #include "readphylipvector.h"
13
14 //**********************************************************************************************************************
15 vector<string> HomovaCommand::setParameters(){  
16         try {
17                 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
18                 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pphylip);
19                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
20                 CommandParameter palpha("alpha", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(palpha);
21                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
22                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
23                 
24                 vector<string> myArray;
25                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
26                 return myArray;
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "HomovaCommand", "setParameters");
30                 exit(1);
31         }
32 }
33 //**********************************************************************************************************************
34 string HomovaCommand::getHelpString(){  
35         try {
36                 string helpString = "";
37                 helpString += "Referenced: Stewart CN, Excoffier L (1996). Assessing population genetic structure and variability with RAPD data: Application to Vaccinium macrocarpon (American Cranberry). J Evol Biol 9: 153-71.\n";
38                 helpString += "The homova command outputs a .homova file. \n";
39                 helpString += "The homova command parameters are phylip, iters, and alpha.  The phylip and design parameters are required, unless valid current files exist.\n";
40                 helpString += "The design parameter allows you to assign your samples to groups when you are running homova. It is required. \n";
41                 helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.\n";
42                 helpString += "The iters parameter allows you to set number of randomization for the P value.  The default is 1000. \n";
43                 helpString += "The homova command should be in the following format: homova(phylip=file.dist, design=file.design).\n";
44                 helpString += "Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).\n";
45                 return helpString;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "HomovaCommand", "getHelpString");
49                 exit(1);
50         }
51 }
52
53 //**********************************************************************************************************************
54
55 HomovaCommand::HomovaCommand(){ 
56         try {
57                 abort = true; calledHelp = true; 
58                 setParameters();
59                 vector<string> tempOutNames;
60                 outputTypes["homova"] = tempOutNames;
61         }
62         catch(exception& e) {
63                 m->errorOut(e, "HomovaCommand", "HomovaCommand");
64                 exit(1);
65         }
66 }
67 //**********************************************************************************************************************
68
69 HomovaCommand::HomovaCommand(string option) {
70         try {
71                 abort = false; calledHelp = false;   
72                 
73                 //allow user to run help
74                 if(option == "help") { help(); abort = true; calledHelp = true; }
75                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
76                 
77                 else {
78                         vector<string> myArray = setParameters();
79                         
80                         OptionParser parser(option);
81                         map<string,string> parameters = parser.getParameters();
82                         
83                         ValidParameters validParameter;
84                         
85                         //check to make sure all parameters are valid for command
86                         map<string,string>::iterator it;
87                         for (it = parameters.begin(); it != parameters.end(); it++) { 
88                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
89                         }
90                         
91                         //initialize outputTypes
92                         vector<string> tempOutNames;
93                         outputTypes["homova"] = tempOutNames;
94                         
95                         //if the user changes the output directory command factory will send this info to us in the output parameter 
96                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
97                         
98                         //if the user changes the input directory command factory will send this info to us in the output parameter 
99                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
100                         if (inputDir == "not found"){   inputDir = "";          }
101                         else {
102                                 string path;
103                                 it = parameters.find("design");
104                                 //user has given a template file
105                                 if(it != parameters.end()){ 
106                                         path = m->hasPath(it->second);
107                                         //if the user has not given a path then, add inputdir. else leave path alone.
108                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
109                                 }
110                                 
111                                 it = parameters.find("phylip");
112                                 //user has given a template file
113                                 if(it != parameters.end()){ 
114                                         path = m->hasPath(it->second);
115                                         //if the user has not given a path then, add inputdir. else leave path alone.
116                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
117                                 }
118                         }
119                         
120                         phylipFileName = validParameter.validFile(parameters, "phylip", true);
121                         if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
122                         else if (phylipFileName == "not found") { 
123                                 //if there is a current phylip file, use it
124                                 phylipFileName = m->getPhylipFile(); 
125                                 if (phylipFileName != "") { m->mothurOut("Using " + phylipFileName + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
126                                 else {  m->mothurOut("You have no current phylip file and the phylip parameter is required."); m->mothurOutEndLine(); abort = true; }
127                                 
128                         }else { m->setPhylipFile(phylipFileName); }     
129                         
130                         //check for required parameters
131                         designFileName = validParameter.validFile(parameters, "design", true);
132                         if (designFileName == "not open") { abort = true; }
133                         else if (designFileName == "not found") {
134                                 //if there is a current design file, use it
135                                 designFileName = m->getDesignFile(); 
136                                 if (designFileName != "") { m->mothurOut("Using " + designFileName + " as input file for the design parameter."); m->mothurOutEndLine(); }
137                                 else {  m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }                                                           
138                         }else { m->setDesignFile(designFileName); }     
139                         
140                         string temp = validParameter.validFile(parameters, "iters", false);
141                         if (temp == "not found") { temp = "1000"; }
142                         convert(temp, iters); 
143                         
144                         temp = validParameter.validFile(parameters, "alpha", false);
145                         if (temp == "not found") { temp = "0.05"; }
146                         convert(temp, experimentwiseAlpha); 
147                 }
148                 
149         }
150         catch(exception& e) {
151                 m->errorOut(e, "HomovaCommand", "HomovaCommand");
152                 exit(1);
153         }
154 }
155 //**********************************************************************************************************************
156
157 int HomovaCommand::execute(){
158         try {
159                 
160                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
161                 
162                 //read design file
163                 designMap = new GroupMap(designFileName);
164                 designMap->readDesignMap();
165                 
166                 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
167                 
168                 //read in distance matrix and square it
169                 ReadPhylipVector readMatrix(phylipFileName);
170                 vector<string> sampleNames = readMatrix.read(distanceMatrix);
171                 
172                 for(int i=0;i<distanceMatrix.size();i++){
173                         for(int j=0;j<i;j++){
174                                 distanceMatrix[i][j] *= distanceMatrix[i][j];   
175                         }
176                 }
177                 
178                 //link designMap to rows/columns in distance matrix
179                 map<string, vector<int> > origGroupSampleMap;
180                 for(int i=0;i<sampleNames.size();i++){
181                         origGroupSampleMap[designMap->getGroup(sampleNames[i])].push_back(i);
182                 }
183                 int numGroups = origGroupSampleMap.size();
184                 
185                 //create a new filename
186                 ofstream HOMOVAFile;
187                 string HOMOVAFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName))  + "homova";                               
188                 m->openOutputFile(HOMOVAFileName, HOMOVAFile);
189                 outputNames.push_back(HOMOVAFileName); outputTypes["homova"].push_back(HOMOVAFileName);
190                 
191                 HOMOVAFile << "HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values" << endl;
192                 m->mothurOut("HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values\n");
193                 
194                 double fullHOMOVAPValue = runHOMOVA(HOMOVAFile, origGroupSampleMap, experimentwiseAlpha);
195
196                 if(fullHOMOVAPValue <= experimentwiseAlpha && numGroups > 2){
197                         
198                         int numCombos = numGroups * (numGroups-1) / 2;
199                         double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
200                         
201                         map<string, vector<int> >::iterator itA;
202                         map<string, vector<int> >::iterator itB;
203                         
204                         for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
205                                 itB = itA;itB++;
206                                 for(;itB!=origGroupSampleMap.end();itB++){
207                                         map<string, vector<int> > pairwiseGroupSampleMap;
208                                         pairwiseGroupSampleMap[itA->first] = itA->second;
209                                         pairwiseGroupSampleMap[itB->first] = itB->second;
210                                         
211                                         runHOMOVA(HOMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha);
212                                 }                       
213                         }
214                         HOMOVAFile << endl;
215                         m->mothurOutEndLine();
216                         
217                         m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
218                         m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
219                 }
220                 else{
221                         m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
222                 }
223                 
224                 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
225                 
226                 delete designMap;
227                 
228                 m->mothurOutEndLine();
229                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
230                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
231                 m->mothurOutEndLine();
232                 
233                 return 0;
234         }
235         catch(exception& e) {
236                 m->errorOut(e, "HomovaCommand", "execute");
237                 exit(1);
238         }
239 }
240
241 //**********************************************************************************************************************
242
243 double HomovaCommand::runHOMOVA(ofstream& HOMOVAFile, map<string, vector<int> > groupSampleMap, double alpha){
244         try {
245                 map<string, vector<int> >::iterator it;
246                 int numGroups = groupSampleMap.size();
247                 
248                 vector<double> ssWithinOrigVector;
249                 double bValueOrig = calcBValue(groupSampleMap, ssWithinOrigVector);
250                 
251                 double counter = 0;
252                 for(int i=0;i<iters;i++){
253                         vector<double> ssWithinRandVector;
254                         map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
255                         double bValueRand = calcBValue(randomizedGroup, ssWithinRandVector);
256                         if(bValueRand > bValueOrig){    counter++;      }
257                 }
258                 
259                 double pValue = (double) counter / (double) iters;
260                 string pString = "";
261                 if(pValue < 1/(double)iters){   pString = '<' + toString(1/(double)iters);      }
262                 else                                            {       pString = toString(pValue);                                     }
263                 
264                 
265                 //print homova table
266                 it = groupSampleMap.begin();
267                 HOMOVAFile << it->first;
268                 m->mothurOut(it->first);
269                 it++;
270                 for(;it!=groupSampleMap.end();it++){
271                         HOMOVAFile << '-' << it->first;
272                         m->mothurOut('-' + it->first);
273                 }
274
275                 HOMOVAFile << '\t' << bValueOrig << '\t' << pString;
276                 m->mothurOut('\t' + toString(bValueOrig) + '\t' + pString);
277                 
278                 if(pValue < alpha){
279                         HOMOVAFile << "*";
280                         m->mothurOut("*");
281                 }
282
283                 for(int i=0;i<numGroups;i++){
284                         HOMOVAFile << '\t' << ssWithinOrigVector[i];
285                         m->mothurOut('\t' + toString(ssWithinOrigVector[i]));
286                 }
287                 HOMOVAFile << endl;
288                 m->mothurOutEndLine();
289                 
290                 return pValue;  
291         }
292         catch(exception& e) {
293                 m->errorOut(e, "HomovaCommand", "runHOMOVA");
294                 exit(1);
295         }
296 }
297
298 //**********************************************************************************************************************
299
300 double HomovaCommand::calcSigleSSWithin(vector<int> sampleIndices) {
301         try {
302                 double ssWithin = 0.0;
303                 int numSamplesInGroup = sampleIndices.size();
304                 
305                 for(int i=0;i<numSamplesInGroup;i++){
306                         int row = sampleIndices[i];
307                         
308                         for(int j=0;j<numSamplesInGroup;j++){
309                                 int col = sampleIndices[j];
310                                 
311                                 if(col < row){
312                                         ssWithin += distanceMatrix[row][col];
313                                 }
314                                 
315                         }
316                 }
317                 
318                 ssWithin /= numSamplesInGroup;
319                 return ssWithin;
320         }
321         catch(exception& e) {
322                 m->errorOut(e, "HomovaCommand", "calcSigleSSWithin");
323                 exit(1);
324         }
325 }
326
327 //**********************************************************************************************************************
328
329 double HomovaCommand::calcBValue(map<string, vector<int> > groupSampleMap, vector<double>& ssWithinVector) {
330         try {
331
332                 map<string, vector<int> >::iterator it;
333                 
334                 double numGroups = (double)groupSampleMap.size();
335                 ssWithinVector.resize(numGroups, 0);
336                 
337                 double totalNumSamples = 0;
338                 double ssWithinFull;
339                 double secondTermSum = 0;
340                 double inverseOneMinusSum = 0;
341                 int index = 0;
342                 
343                 ssWithinVector.resize(numGroups, 0);
344                 for(it = groupSampleMap.begin();it!=groupSampleMap.end();it++){
345                         int numSamplesInGroup = it->second.size();
346                         totalNumSamples += numSamplesInGroup;
347                         
348                         ssWithinVector[index] = calcSigleSSWithin(it->second);
349                         ssWithinFull += ssWithinVector[index];
350                         
351                         secondTermSum += (numSamplesInGroup - 1) * log(ssWithinVector[index] / (double)(numSamplesInGroup - 1));
352                         inverseOneMinusSum += 1.0 / (double)(numSamplesInGroup - 1);
353                         
354                         ssWithinVector[index] /= (double)(numSamplesInGroup - 1); //this line is only for output purposes to scale SSw by the number of samples in the group
355                         index++;
356                 }
357                 
358                 double B = (totalNumSamples - numGroups) * log(ssWithinFull/(totalNumSamples-numGroups)) - secondTermSum;
359                 double denomintor = 1 + 1.0/(3.0 * (numGroups - 1.0)) * (inverseOneMinusSum - 1.0 / (double) (totalNumSamples - numGroups));
360                 B /= denomintor;
361                 
362                 return B;
363                 
364         }
365         catch(exception& e) {
366                 m->errorOut(e, "HomovaCommand", "calcBValue");
367                 exit(1);
368         }
369 }
370
371 //**********************************************************************************************************************
372
373 map<string, vector<int> > HomovaCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
374         try{
375                 vector<int> sampleIndices;
376                 vector<int> samplesPerGroup;
377                 
378                 map<string, vector<int> >::iterator it;
379                 for(it=origMapping.begin();it!=origMapping.end();it++){
380                         vector<int> indices = it->second;
381                         samplesPerGroup.push_back(indices.size());
382                         sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
383                 }
384                 
385                 random_shuffle(sampleIndices.begin(), sampleIndices.end());
386                 
387                 int index = 0;
388                 map<string, vector<int> > randomizedGroups = origMapping;
389                 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
390                         for(int i=0;i<it->second.size();i++){
391                                 it->second[i] = sampleIndices[index++];                         
392                         }
393                 }
394                 
395                 return randomizedGroups;                
396         }
397         catch (exception& e) {
398                 m->errorOut(e, "AmovaCommand", "randomizeGroups");
399                 exit(1);
400         }
401 }
402
403 //**********************************************************************************************************************
404
405