]> git.donarmstrong.com Git - mothur.git/blob - homovacommand.cpp
Merge remote-tracking branch 'origin/master'
[mothur.git] / homovacommand.cpp
1 /*
2  *  homovacommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 2/8/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "homovacommand.h"
11 #include "groupmap.h"
12 #include "readphylipvector.h"
13 #include "sharedutilities.h"
14
15 //**********************************************************************************************************************
16 vector<string> HomovaCommand::setParameters(){  
17         try {
18                 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
19                 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pphylip);
20         CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
21                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
22                 CommandParameter palpha("alpha", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(palpha);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25                 
26                 vector<string> myArray;
27                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "HomovaCommand", "setParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 string HomovaCommand::getHelpString(){  
37         try {
38                 string helpString = "";
39                 helpString += "Referenced: Stewart CN, Excoffier L (1996). Assessing population genetic structure and variability with RAPD data: Application to Vaccinium macrocarpon (American Cranberry). J Evol Biol 9: 153-71.\n";
40                 helpString += "The homova command outputs a .homova file. \n";
41                 helpString += "The homova command parameters are phylip, iters, sets and alpha.  The phylip and design parameters are required, unless valid current files exist.\n";
42                 helpString += "The design parameter allows you to assign your samples to groups when you are running homova. It is required. \n";
43                 helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.\n";
44         helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
45                 helpString += "The iters parameter allows you to set number of randomization for the P value.  The default is 1000. \n";
46                 helpString += "The homova command should be in the following format: homova(phylip=file.dist, design=file.design).\n";
47                 helpString += "Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).\n";
48                 return helpString;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "HomovaCommand", "getHelpString");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string HomovaCommand::getOutputFileNameTag(string type, string inputName=""){   
57         try {
58         string outputFileName = "";
59                 map<string, vector<string> >::iterator it;
60         
61         //is this a type this command creates
62         it = outputTypes.find(type);
63         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
64         else {
65             if (type == "homova")            {   outputFileName =  "homova";   }
66             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
67         }
68         return outputFileName;
69         }
70         catch(exception& e) {
71                 m->errorOut(e, "HomovaCommand", "getOutputFileNameTag");
72                 exit(1);
73         }
74 }
75 //**********************************************************************************************************************
76 HomovaCommand::HomovaCommand(){ 
77         try {
78                 abort = true; calledHelp = true; 
79                 setParameters();
80                 vector<string> tempOutNames;
81                 outputTypes["homova"] = tempOutNames;
82         }
83         catch(exception& e) {
84                 m->errorOut(e, "HomovaCommand", "HomovaCommand");
85                 exit(1);
86         }
87 }
88 //**********************************************************************************************************************
89
90 HomovaCommand::HomovaCommand(string option) {
91         try {
92                 abort = false; calledHelp = false;   
93                 
94                 //allow user to run help
95                 if(option == "help") { help(); abort = true; calledHelp = true; }
96                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
97                 
98                 else {
99                         vector<string> myArray = setParameters();
100                         
101                         OptionParser parser(option);
102                         map<string,string> parameters = parser.getParameters();
103                         
104                         ValidParameters validParameter;
105                         
106                         //check to make sure all parameters are valid for command
107                         map<string,string>::iterator it;
108                         for (it = parameters.begin(); it != parameters.end(); it++) { 
109                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
110                         }
111                         
112                         //initialize outputTypes
113                         vector<string> tempOutNames;
114                         outputTypes["homova"] = tempOutNames;
115                         
116                         //if the user changes the output directory command factory will send this info to us in the output parameter 
117                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
118                         
119                         //if the user changes the input directory command factory will send this info to us in the output parameter 
120                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
121                         if (inputDir == "not found"){   inputDir = "";          }
122                         else {
123                                 string path;
124                                 it = parameters.find("design");
125                                 //user has given a template file
126                                 if(it != parameters.end()){ 
127                                         path = m->hasPath(it->second);
128                                         //if the user has not given a path then, add inputdir. else leave path alone.
129                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
130                                 }
131                                 
132                                 it = parameters.find("phylip");
133                                 //user has given a template file
134                                 if(it != parameters.end()){ 
135                                         path = m->hasPath(it->second);
136                                         //if the user has not given a path then, add inputdir. else leave path alone.
137                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
138                                 }
139                         }
140                         
141                         phylipFileName = validParameter.validFile(parameters, "phylip", true);
142                         if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
143                         else if (phylipFileName == "not found") { 
144                                 //if there is a current phylip file, use it
145                                 phylipFileName = m->getPhylipFile(); 
146                                 if (phylipFileName != "") { m->mothurOut("Using " + phylipFileName + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
147                                 else {  m->mothurOut("You have no current phylip file and the phylip parameter is required."); m->mothurOutEndLine(); abort = true; }
148                                 
149                         }else { m->setPhylipFile(phylipFileName); }     
150                         
151                         //check for required parameters
152                         designFileName = validParameter.validFile(parameters, "design", true);
153                         if (designFileName == "not open") { abort = true; }
154                         else if (designFileName == "not found") {
155                                 //if there is a current design file, use it
156                                 designFileName = m->getDesignFile(); 
157                                 if (designFileName != "") { m->mothurOut("Using " + designFileName + " as input file for the design parameter."); m->mothurOutEndLine(); }
158                                 else {  m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }                                                           
159                         }else { m->setDesignFile(designFileName); }     
160                         
161                         string temp = validParameter.validFile(parameters, "iters", false);
162                         if (temp == "not found") { temp = "1000"; }
163                         m->mothurConvert(temp, iters); 
164                         
165                         temp = validParameter.validFile(parameters, "alpha", false);
166                         if (temp == "not found") { temp = "0.05"; }
167                         m->mothurConvert(temp, experimentwiseAlpha); 
168             
169             string sets = validParameter.validFile(parameters, "sets", false);                  
170                         if (sets == "not found") { sets = ""; }
171                         else { 
172                                 m->splitAtDash(sets, Sets);
173                         }
174                 }
175                 
176         }
177         catch(exception& e) {
178                 m->errorOut(e, "HomovaCommand", "HomovaCommand");
179                 exit(1);
180         }
181 }
182 //**********************************************************************************************************************
183
184 int HomovaCommand::execute(){
185         try {
186                 
187                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
188                 
189                 //read design file
190                 designMap = new GroupMap(designFileName);
191                 designMap->readDesignMap();
192                 
193                 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
194                 
195                 //read in distance matrix and square it
196                 ReadPhylipVector readMatrix(phylipFileName);
197                 vector<string> sampleNames = readMatrix.read(distanceMatrix);
198                 
199         if (Sets.size() != 0) { //user selected sets, so we want to remove the samples not in those sets
200             SharedUtil util; 
201             vector<string> dGroups = designMap->getNamesOfGroups();
202             util.setGroups(Sets, dGroups);  
203             
204             for(int i=0;i<distanceMatrix.size();i++){
205                 
206                 if (m->control_pressed) { delete designMap; return 0; }
207                 
208                 string group = designMap->getGroup(sampleNames[i]);
209                 
210                 if (group == "not found") {
211                     m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
212                 }else if (!m->inUsersGroups(group, Sets)){  //not in set we want remove it
213                     //remove from all other rows
214                     for(int j=0;j<distanceMatrix.size();j++){
215                         distanceMatrix[j].erase(distanceMatrix[j].begin()+i);
216                     }
217                     distanceMatrix.erase(distanceMatrix.begin()+i);
218                     sampleNames.erase(sampleNames.begin()+i);
219                     i--;
220                 }
221             }
222         }
223
224                 for(int i=0;i<distanceMatrix.size();i++){
225                         for(int j=0;j<i;j++){
226                                 distanceMatrix[i][j] *= distanceMatrix[i][j];   
227                         }
228                 }
229                 
230                 //link designMap to rows/columns in distance matrix
231                 map<string, vector<int> > origGroupSampleMap;
232                 for(int i=0;i<sampleNames.size();i++){
233                         string group = designMap->getGroup(sampleNames[i]);
234                         
235                         if (group == "not found") {
236                                 m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
237                         }else { origGroupSampleMap[group].push_back(i); }
238                 }
239                 int numGroups = origGroupSampleMap.size();
240                 
241                 if (m->control_pressed) { delete designMap; return 0; }
242                 
243                 //create a new filename
244                 ofstream HOMOVAFile;
245                 string HOMOVAFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName)) + getOutputFileNameTag("homova");                          
246                 m->openOutputFile(HOMOVAFileName, HOMOVAFile);
247                 outputNames.push_back(HOMOVAFileName); outputTypes["homova"].push_back(HOMOVAFileName);
248                 
249                 HOMOVAFile << "HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values" << endl;
250                 m->mothurOut("HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values\n");
251                 
252                 double fullHOMOVAPValue = runHOMOVA(HOMOVAFile, origGroupSampleMap, experimentwiseAlpha);
253
254                 if(fullHOMOVAPValue <= experimentwiseAlpha && numGroups > 2){
255                         
256                         int numCombos = numGroups * (numGroups-1) / 2;
257                         double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
258                         
259                         map<string, vector<int> >::iterator itA;
260                         map<string, vector<int> >::iterator itB;
261                         
262                         for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
263                                 itB = itA;itB++;
264                                 for(;itB!=origGroupSampleMap.end();itB++){
265                                         map<string, vector<int> > pairwiseGroupSampleMap;
266                                         pairwiseGroupSampleMap[itA->first] = itA->second;
267                                         pairwiseGroupSampleMap[itB->first] = itB->second;
268                                         
269                                         runHOMOVA(HOMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha);
270                                 }                       
271                         }
272                         HOMOVAFile << endl;
273                         m->mothurOutEndLine();
274                         
275                         m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
276                         m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
277                 }
278                 else{
279                         m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
280                 }
281                 
282                 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
283                 
284                 delete designMap;
285                 
286                 m->mothurOutEndLine();
287                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
288                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
289                 m->mothurOutEndLine();
290                 
291                 return 0;
292         }
293         catch(exception& e) {
294                 m->errorOut(e, "HomovaCommand", "execute");
295                 exit(1);
296         }
297 }
298
299 //**********************************************************************************************************************
300
301 double HomovaCommand::runHOMOVA(ofstream& HOMOVAFile, map<string, vector<int> > groupSampleMap, double alpha){
302         try {
303                 map<string, vector<int> >::iterator it;
304                 int numGroups = groupSampleMap.size();
305                 
306                 vector<double> ssWithinOrigVector;
307                 double bValueOrig = calcBValue(groupSampleMap, ssWithinOrigVector);
308                 
309                 double counter = 0;
310                 for(int i=0;i<iters;i++){
311                         vector<double> ssWithinRandVector;
312                         map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
313                         double bValueRand = calcBValue(randomizedGroup, ssWithinRandVector);
314                         if(bValueRand > bValueOrig){    counter++;      }
315                 }
316                 
317                 double pValue = (double) counter / (double) iters;
318                 string pString = "";
319                 if(pValue < 1/(double)iters){   pString = '<' + toString(1/(double)iters);      }
320                 else                                            {       pString = toString(pValue);                                     }
321                 
322                 
323                 //print homova table
324                 it = groupSampleMap.begin();
325                 HOMOVAFile << it->first;
326                 m->mothurOut(it->first);
327                 it++;
328                 for(;it!=groupSampleMap.end();it++){
329                         HOMOVAFile << '-' << it->first;
330                         m->mothurOut('-' + it->first);
331                 }
332
333                 HOMOVAFile << '\t' << bValueOrig << '\t' << pString;
334                 m->mothurOut('\t' + toString(bValueOrig) + '\t' + pString);
335                 
336                 if(pValue < alpha){
337                         HOMOVAFile << "*";
338                         m->mothurOut("*");
339                 }
340
341                 for(int i=0;i<numGroups;i++){
342                         HOMOVAFile << '\t' << ssWithinOrigVector[i];
343                         m->mothurOut('\t' + toString(ssWithinOrigVector[i]));
344                 }
345                 HOMOVAFile << endl;
346                 m->mothurOutEndLine();
347                 
348                 return pValue;  
349         }
350         catch(exception& e) {
351                 m->errorOut(e, "HomovaCommand", "runHOMOVA");
352                 exit(1);
353         }
354 }
355
356 //**********************************************************************************************************************
357
358 double HomovaCommand::calcSigleSSWithin(vector<int> sampleIndices) {
359         try {
360                 double ssWithin = 0.0;
361                 int numSamplesInGroup = sampleIndices.size();
362                 
363                 for(int i=0;i<numSamplesInGroup;i++){
364                         int row = sampleIndices[i];
365                         
366                         for(int j=0;j<numSamplesInGroup;j++){
367                                 int col = sampleIndices[j];
368                                 
369                                 if(col < row){
370                                         ssWithin += distanceMatrix[row][col];
371                                 }
372                                 
373                         }
374                 }
375                 
376                 ssWithin /= numSamplesInGroup;
377                 return ssWithin;
378         }
379         catch(exception& e) {
380                 m->errorOut(e, "HomovaCommand", "calcSigleSSWithin");
381                 exit(1);
382         }
383 }
384
385 //**********************************************************************************************************************
386
387 double HomovaCommand::calcBValue(map<string, vector<int> > groupSampleMap, vector<double>& ssWithinVector) {
388         try {
389
390                 map<string, vector<int> >::iterator it;
391                 
392                 double numGroups = (double)groupSampleMap.size();
393                 ssWithinVector.resize(numGroups, 0);
394                 
395                 double totalNumSamples = 0;
396                 double ssWithinFull;
397                 double secondTermSum = 0;
398                 double inverseOneMinusSum = 0;
399                 int index = 0;
400                 
401                 ssWithinVector.resize(numGroups, 0);
402                 for(it = groupSampleMap.begin();it!=groupSampleMap.end();it++){
403                         int numSamplesInGroup = it->second.size();
404                         totalNumSamples += numSamplesInGroup;
405                         
406                         ssWithinVector[index] = calcSigleSSWithin(it->second);
407                         ssWithinFull += ssWithinVector[index];
408                         
409                         secondTermSum += (numSamplesInGroup - 1) * log(ssWithinVector[index] / (double)(numSamplesInGroup - 1));
410                         inverseOneMinusSum += 1.0 / (double)(numSamplesInGroup - 1);
411                         
412                         ssWithinVector[index] /= (double)(numSamplesInGroup - 1); //this line is only for output purposes to scale SSw by the number of samples in the group
413                         index++;
414                 }
415                 
416                 double B = (totalNumSamples - numGroups) * log(ssWithinFull/(totalNumSamples-numGroups)) - secondTermSum;
417                 double denomintor = 1 + 1.0/(3.0 * (numGroups - 1.0)) * (inverseOneMinusSum - 1.0 / (double) (totalNumSamples - numGroups));
418                 B /= denomintor;
419                 
420                 return B;
421                 
422         }
423         catch(exception& e) {
424                 m->errorOut(e, "HomovaCommand", "calcBValue");
425                 exit(1);
426         }
427 }
428
429 //**********************************************************************************************************************
430
431 map<string, vector<int> > HomovaCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
432         try{
433                 vector<int> sampleIndices;
434                 vector<int> samplesPerGroup;
435                 
436                 map<string, vector<int> >::iterator it;
437                 for(it=origMapping.begin();it!=origMapping.end();it++){
438                         vector<int> indices = it->second;
439                         samplesPerGroup.push_back(indices.size());
440                         sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
441                 }
442                 
443                 random_shuffle(sampleIndices.begin(), sampleIndices.end());
444                 
445                 int index = 0;
446                 map<string, vector<int> > randomizedGroups = origMapping;
447                 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
448                         for(int i=0;i<it->second.size();i++){
449                                 it->second[i] = sampleIndices[index++];                         
450                         }
451                 }
452                 
453                 return randomizedGroups;                
454         }
455         catch (exception& e) {
456                 m->errorOut(e, "AmovaCommand", "randomizeGroups");
457                 exit(1);
458         }
459 }
460
461 //**********************************************************************************************************************
462
463