]> git.donarmstrong.com Git - mothur.git/blob - homovacommand.cpp
changes while testing
[mothur.git] / homovacommand.cpp
1 /*
2  *  homovacommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 2/8/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "homovacommand.h"
11 #include "groupmap.h"
12 #include "readphylipvector.h"
13 #include "sharedutilities.h"
14
15 //**********************************************************************************************************************
16 vector<string> HomovaCommand::setParameters(){  
17         try {
18                 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","homova",false,true,true); parameters.push_back(pdesign);
19                 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none","homova",false,true,true); parameters.push_back(pphylip);
20         CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets);
21                 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
22                 CommandParameter palpha("alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
25                 
26                 vector<string> myArray;
27                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "HomovaCommand", "setParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 string HomovaCommand::getHelpString(){  
37         try {
38                 string helpString = "";
39                 helpString += "Referenced: Stewart CN, Excoffier L (1996). Assessing population genetic structure and variability with RAPD data: Application to Vaccinium macrocarpon (American Cranberry). J Evol Biol 9: 153-71.\n";
40                 helpString += "The homova command outputs a .homova file. \n";
41                 helpString += "The homova command parameters are phylip, iters, sets and alpha.  The phylip and design parameters are required, unless valid current files exist.\n";
42                 helpString += "The design parameter allows you to assign your samples to groups when you are running homova. It is required. \n";
43                 helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.\n";
44         helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
45                 helpString += "The iters parameter allows you to set number of randomization for the P value.  The default is 1000. \n";
46                 helpString += "The homova command should be in the following format: homova(phylip=file.dist, design=file.design).\n";
47                 helpString += "Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).\n";
48                 return helpString;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "HomovaCommand", "getHelpString");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string HomovaCommand::getOutputPattern(string type) {
57     try {
58         string pattern = "";
59         
60         if (type == "homova") {  pattern = "[filename],homova"; } 
61         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
62         
63         return pattern;
64     }
65     catch(exception& e) {
66         m->errorOut(e, "HomovaCommand", "getOutputPattern");
67         exit(1);
68     }
69 }
70 //**********************************************************************************************************************
71 HomovaCommand::HomovaCommand(){ 
72         try {
73                 abort = true; calledHelp = true; 
74                 setParameters();
75                 vector<string> tempOutNames;
76                 outputTypes["homova"] = tempOutNames;
77         }
78         catch(exception& e) {
79                 m->errorOut(e, "HomovaCommand", "HomovaCommand");
80                 exit(1);
81         }
82 }
83 //**********************************************************************************************************************
84
85 HomovaCommand::HomovaCommand(string option) {
86         try {
87                 abort = false; calledHelp = false;   
88                 
89                 //allow user to run help
90                 if(option == "help") { help(); abort = true; calledHelp = true; }
91                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
92                 
93                 else {
94                         vector<string> myArray = setParameters();
95                         
96                         OptionParser parser(option);
97                         map<string,string> parameters = parser.getParameters();
98                         
99                         ValidParameters validParameter;
100                         
101                         //check to make sure all parameters are valid for command
102                         map<string,string>::iterator it;
103                         for (it = parameters.begin(); it != parameters.end(); it++) { 
104                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
105                         }
106                         
107                         //initialize outputTypes
108                         vector<string> tempOutNames;
109                         outputTypes["homova"] = tempOutNames;
110                         
111                         //if the user changes the output directory command factory will send this info to us in the output parameter 
112                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
113                         
114                         //if the user changes the input directory command factory will send this info to us in the output parameter 
115                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
116                         if (inputDir == "not found"){   inputDir = "";          }
117                         else {
118                                 string path;
119                                 it = parameters.find("design");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
125                                 }
126                                 
127                                 it = parameters.find("phylip");
128                                 //user has given a template file
129                                 if(it != parameters.end()){ 
130                                         path = m->hasPath(it->second);
131                                         //if the user has not given a path then, add inputdir. else leave path alone.
132                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
133                                 }
134                         }
135                         
136                         phylipFileName = validParameter.validFile(parameters, "phylip", true);
137                         if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
138                         else if (phylipFileName == "not found") { 
139                                 //if there is a current phylip file, use it
140                                 phylipFileName = m->getPhylipFile(); 
141                                 if (phylipFileName != "") { m->mothurOut("Using " + phylipFileName + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
142                                 else {  m->mothurOut("You have no current phylip file and the phylip parameter is required."); m->mothurOutEndLine(); abort = true; }
143                                 
144                         }else { m->setPhylipFile(phylipFileName); }     
145                         
146                         //check for required parameters
147                         designFileName = validParameter.validFile(parameters, "design", true);
148                         if (designFileName == "not open") { abort = true; }
149                         else if (designFileName == "not found") {
150                                 //if there is a current design file, use it
151                                 designFileName = m->getDesignFile(); 
152                                 if (designFileName != "") { m->mothurOut("Using " + designFileName + " as input file for the design parameter."); m->mothurOutEndLine(); }
153                                 else {  m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }                                                           
154                         }else { m->setDesignFile(designFileName); }     
155                         
156                         string temp = validParameter.validFile(parameters, "iters", false);
157                         if (temp == "not found") { temp = "1000"; }
158                         m->mothurConvert(temp, iters); 
159                         
160                         temp = validParameter.validFile(parameters, "alpha", false);
161                         if (temp == "not found") { temp = "0.05"; }
162                         m->mothurConvert(temp, experimentwiseAlpha); 
163             
164             string sets = validParameter.validFile(parameters, "sets", false);                  
165                         if (sets == "not found") { sets = ""; }
166                         else { 
167                                 m->splitAtDash(sets, Sets);
168                         }
169                 }
170                 
171         }
172         catch(exception& e) {
173                 m->errorOut(e, "HomovaCommand", "HomovaCommand");
174                 exit(1);
175         }
176 }
177 //**********************************************************************************************************************
178
179 int HomovaCommand::execute(){
180         try {
181                 
182                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
183                 
184                 //read design file
185                 designMap = new GroupMap(designFileName);
186                 designMap->readDesignMap();
187                 
188                 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
189                 
190                 //read in distance matrix and square it
191                 ReadPhylipVector readMatrix(phylipFileName);
192                 vector<string> sampleNames = readMatrix.read(distanceMatrix);
193                 
194         if (Sets.size() != 0) { //user selected sets, so we want to remove the samples not in those sets
195             SharedUtil util; 
196             vector<string> dGroups = designMap->getNamesOfGroups();
197             util.setGroups(Sets, dGroups);  
198             
199             for(int i=0;i<distanceMatrix.size();i++){
200                 
201                 if (m->control_pressed) { delete designMap; return 0; }
202                 
203                 string group = designMap->getGroup(sampleNames[i]);
204                 
205                 if (group == "not found") {
206                     m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
207                 }else if (!m->inUsersGroups(group, Sets)){  //not in set we want remove it
208                     //remove from all other rows
209                     for(int j=0;j<distanceMatrix.size();j++){
210                         distanceMatrix[j].erase(distanceMatrix[j].begin()+i);
211                     }
212                     distanceMatrix.erase(distanceMatrix.begin()+i);
213                     sampleNames.erase(sampleNames.begin()+i);
214                     i--;
215                 }
216             }
217         }
218
219                 for(int i=0;i<distanceMatrix.size();i++){
220                         for(int j=0;j<i;j++){
221                                 distanceMatrix[i][j] *= distanceMatrix[i][j];   
222                         }
223                 }
224                 
225                 //link designMap to rows/columns in distance matrix
226                 map<string, vector<int> > origGroupSampleMap;
227                 for(int i=0;i<sampleNames.size();i++){
228                         string group = designMap->getGroup(sampleNames[i]);
229                         
230                         if (group == "not found") {
231                                 m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
232                         }else { origGroupSampleMap[group].push_back(i); }
233                 }
234                 int numGroups = origGroupSampleMap.size();
235                 
236                 if (m->control_pressed) { delete designMap; return 0; }
237                 
238                 //create a new filename
239                 ofstream HOMOVAFile;
240         map<string, string> variables; 
241                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(phylipFileName));
242                 string HOMOVAFileName = getOutputFileName("homova", variables);                         
243                 m->openOutputFile(HOMOVAFileName, HOMOVAFile);
244                 outputNames.push_back(HOMOVAFileName); outputTypes["homova"].push_back(HOMOVAFileName);
245                 
246                 HOMOVAFile << "HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values" << endl;
247                 m->mothurOut("HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values\n");
248                 
249                 double fullHOMOVAPValue = runHOMOVA(HOMOVAFile, origGroupSampleMap, experimentwiseAlpha);
250
251                 if(fullHOMOVAPValue <= experimentwiseAlpha && numGroups > 2){
252                         
253                         int numCombos = numGroups * (numGroups-1) / 2;
254                         double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
255                         
256                         map<string, vector<int> >::iterator itA;
257                         map<string, vector<int> >::iterator itB;
258                         
259                         for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
260                                 itB = itA;itB++;
261                                 for(;itB!=origGroupSampleMap.end();itB++){
262                                         map<string, vector<int> > pairwiseGroupSampleMap;
263                                         pairwiseGroupSampleMap[itA->first] = itA->second;
264                                         pairwiseGroupSampleMap[itB->first] = itB->second;
265                                         
266                                         runHOMOVA(HOMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha);
267                                 }                       
268                         }
269                         HOMOVAFile << endl;
270                         m->mothurOutEndLine();
271                         
272                         m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
273                         m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
274                 }
275                 else{
276                         m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
277                 }
278                 
279                 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
280                 
281                 delete designMap;
282                 
283                 m->mothurOutEndLine();
284                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
285                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
286                 m->mothurOutEndLine();
287                 
288                 return 0;
289         }
290         catch(exception& e) {
291                 m->errorOut(e, "HomovaCommand", "execute");
292                 exit(1);
293         }
294 }
295
296 //**********************************************************************************************************************
297
298 double HomovaCommand::runHOMOVA(ofstream& HOMOVAFile, map<string, vector<int> > groupSampleMap, double alpha){
299         try {
300                 map<string, vector<int> >::iterator it;
301                 int numGroups = groupSampleMap.size();
302                 
303                 vector<double> ssWithinOrigVector;
304                 double bValueOrig = calcBValue(groupSampleMap, ssWithinOrigVector);
305                 
306                 double counter = 0;
307                 for(int i=0;i<iters;i++){
308                         vector<double> ssWithinRandVector;
309                         map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
310                         double bValueRand = calcBValue(randomizedGroup, ssWithinRandVector);
311                         if(bValueRand >= bValueOrig){   counter++;      }
312                 }
313                 
314                 double pValue = (double) counter / (double) iters;
315                 string pString = "";
316                 if(pValue < 1/(double)iters){   pString = '<' + toString(1/(double)iters);      }
317                 else                                            {       pString = toString(pValue);                                     }
318                 
319                 
320                 //print homova table
321                 it = groupSampleMap.begin();
322                 HOMOVAFile << it->first;
323                 m->mothurOut(it->first);
324                 it++;
325                 for(;it!=groupSampleMap.end();it++){
326                         HOMOVAFile << '-' << it->first;
327                         m->mothurOut('-' + it->first);
328                 }
329
330                 HOMOVAFile << '\t' << bValueOrig << '\t' << pString;
331                 m->mothurOut('\t' + toString(bValueOrig) + '\t' + pString);
332                 
333                 if(pValue < alpha){
334                         HOMOVAFile << "*";
335                         m->mothurOut("*");
336                 }
337
338                 for(int i=0;i<numGroups;i++){
339                         HOMOVAFile << '\t' << ssWithinOrigVector[i];
340                         m->mothurOut('\t' + toString(ssWithinOrigVector[i]));
341                 }
342                 HOMOVAFile << endl;
343                 m->mothurOutEndLine();
344                 
345                 return pValue;  
346         }
347         catch(exception& e) {
348                 m->errorOut(e, "HomovaCommand", "runHOMOVA");
349                 exit(1);
350         }
351 }
352
353 //**********************************************************************************************************************
354
355 double HomovaCommand::calcSigleSSWithin(vector<int> sampleIndices) {
356         try {
357                 double ssWithin = 0.0;
358                 int numSamplesInGroup = sampleIndices.size();
359                 
360                 for(int i=0;i<numSamplesInGroup;i++){
361                         int row = sampleIndices[i];
362                         
363                         for(int j=0;j<numSamplesInGroup;j++){
364                                 int col = sampleIndices[j];
365                                 
366                                 if(col < row){
367                                         ssWithin += distanceMatrix[row][col];
368                                 }
369                                 
370                         }
371                 }
372                 
373                 ssWithin /= numSamplesInGroup;
374                 return ssWithin;
375         }
376         catch(exception& e) {
377                 m->errorOut(e, "HomovaCommand", "calcSigleSSWithin");
378                 exit(1);
379         }
380 }
381
382 //**********************************************************************************************************************
383
384 double HomovaCommand::calcBValue(map<string, vector<int> > groupSampleMap, vector<double>& ssWithinVector) {
385         try {
386
387                 map<string, vector<int> >::iterator it;
388                 
389                 double numGroups = (double)groupSampleMap.size();
390                 ssWithinVector.resize(numGroups, 0);
391                 
392                 double totalNumSamples = 0;
393                 double ssWithinFull;
394                 double secondTermSum = 0;
395                 double inverseOneMinusSum = 0;
396                 int index = 0;
397                 
398                 ssWithinVector.resize(numGroups, 0);
399                 for(it = groupSampleMap.begin();it!=groupSampleMap.end();it++){
400                         int numSamplesInGroup = it->second.size();
401                         totalNumSamples += numSamplesInGroup;
402                         
403                         ssWithinVector[index] = calcSigleSSWithin(it->second);
404                         ssWithinFull += ssWithinVector[index];
405                         
406                         secondTermSum += (numSamplesInGroup - 1) * log(ssWithinVector[index] / (double)(numSamplesInGroup - 1));
407                         inverseOneMinusSum += 1.0 / (double)(numSamplesInGroup - 1);
408                         
409                         ssWithinVector[index] /= (double)(numSamplesInGroup - 1); //this line is only for output purposes to scale SSw by the number of samples in the group
410                         index++;
411                 }
412                 
413                 double B = (totalNumSamples - numGroups) * log(ssWithinFull/(totalNumSamples-numGroups)) - secondTermSum;
414                 double denomintor = 1 + 1.0/(3.0 * (numGroups - 1.0)) * (inverseOneMinusSum - 1.0 / (double) (totalNumSamples - numGroups));
415                 B /= denomintor;
416                 
417                 return B;
418                 
419         }
420         catch(exception& e) {
421                 m->errorOut(e, "HomovaCommand", "calcBValue");
422                 exit(1);
423         }
424 }
425
426 //**********************************************************************************************************************
427
428 map<string, vector<int> > HomovaCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
429         try{
430                 vector<int> sampleIndices;
431                 vector<int> samplesPerGroup;
432                 
433                 map<string, vector<int> >::iterator it;
434                 for(it=origMapping.begin();it!=origMapping.end();it++){
435                         vector<int> indices = it->second;
436                         samplesPerGroup.push_back(indices.size());
437                         sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
438                 }
439                 
440                 random_shuffle(sampleIndices.begin(), sampleIndices.end());
441                 
442                 int index = 0;
443                 map<string, vector<int> > randomizedGroups = origMapping;
444                 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
445                         for(int i=0;i<it->second.size();i++){
446                                 it->second[i] = sampleIndices[index++];                         
447                         }
448                 }
449                 
450                 return randomizedGroups;                
451         }
452         catch (exception& e) {
453                 m->errorOut(e, "AmovaCommand", "randomizeGroups");
454                 exit(1);
455         }
456 }
457
458 //**********************************************************************************************************************
459
460