]> git.donarmstrong.com Git - mothur.git/blob - homovacommand.cpp
added load.logfile command. changed summary.single output for subsample=t.
[mothur.git] / homovacommand.cpp
1 /*
2  *  homovacommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 2/8/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "homovacommand.h"
11 #include "groupmap.h"
12 #include "readphylipvector.h"
13
14 //**********************************************************************************************************************
15 vector<string> HomovaCommand::setParameters(){  
16         try {
17                 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
18                 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pphylip);
19                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
20                 CommandParameter palpha("alpha", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(palpha);
21                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
22                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
23                 
24                 vector<string> myArray;
25                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
26                 return myArray;
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "HomovaCommand", "setParameters");
30                 exit(1);
31         }
32 }
33 //**********************************************************************************************************************
34 string HomovaCommand::getHelpString(){  
35         try {
36                 string helpString = "";
37                 helpString += "Referenced: Stewart CN, Excoffier L (1996). Assessing population genetic structure and variability with RAPD data: Application to Vaccinium macrocarpon (American Cranberry). J Evol Biol 9: 153-71.\n";
38                 helpString += "The homova command outputs a .homova file. \n";
39                 helpString += "The homova command parameters are phylip, iters, and alpha.  The phylip and design parameters are required, unless valid current files exist.\n";
40                 helpString += "The design parameter allows you to assign your samples to groups when you are running homova. It is required. \n";
41                 helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.\n";
42                 helpString += "The iters parameter allows you to set number of randomization for the P value.  The default is 1000. \n";
43                 helpString += "The homova command should be in the following format: homova(phylip=file.dist, design=file.design).\n";
44                 helpString += "Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).\n";
45                 return helpString;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "HomovaCommand", "getHelpString");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 string HomovaCommand::getOutputFileNameTag(string type, string inputName=""){   
54         try {
55         string outputFileName = "";
56                 map<string, vector<string> >::iterator it;
57         
58         //is this a type this command creates
59         it = outputTypes.find(type);
60         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
61         else {
62             if (type == "homova")            {   outputFileName =  "homova";   }
63             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
64         }
65         return outputFileName;
66         }
67         catch(exception& e) {
68                 m->errorOut(e, "HomovaCommand", "getOutputFileNameTag");
69                 exit(1);
70         }
71 }
72 //**********************************************************************************************************************
73 HomovaCommand::HomovaCommand(){ 
74         try {
75                 abort = true; calledHelp = true; 
76                 setParameters();
77                 vector<string> tempOutNames;
78                 outputTypes["homova"] = tempOutNames;
79         }
80         catch(exception& e) {
81                 m->errorOut(e, "HomovaCommand", "HomovaCommand");
82                 exit(1);
83         }
84 }
85 //**********************************************************************************************************************
86
87 HomovaCommand::HomovaCommand(string option) {
88         try {
89                 abort = false; calledHelp = false;   
90                 
91                 //allow user to run help
92                 if(option == "help") { help(); abort = true; calledHelp = true; }
93                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
94                 
95                 else {
96                         vector<string> myArray = setParameters();
97                         
98                         OptionParser parser(option);
99                         map<string,string> parameters = parser.getParameters();
100                         
101                         ValidParameters validParameter;
102                         
103                         //check to make sure all parameters are valid for command
104                         map<string,string>::iterator it;
105                         for (it = parameters.begin(); it != parameters.end(); it++) { 
106                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
107                         }
108                         
109                         //initialize outputTypes
110                         vector<string> tempOutNames;
111                         outputTypes["homova"] = tempOutNames;
112                         
113                         //if the user changes the output directory command factory will send this info to us in the output parameter 
114                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
115                         
116                         //if the user changes the input directory command factory will send this info to us in the output parameter 
117                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
118                         if (inputDir == "not found"){   inputDir = "";          }
119                         else {
120                                 string path;
121                                 it = parameters.find("design");
122                                 //user has given a template file
123                                 if(it != parameters.end()){ 
124                                         path = m->hasPath(it->second);
125                                         //if the user has not given a path then, add inputdir. else leave path alone.
126                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
127                                 }
128                                 
129                                 it = parameters.find("phylip");
130                                 //user has given a template file
131                                 if(it != parameters.end()){ 
132                                         path = m->hasPath(it->second);
133                                         //if the user has not given a path then, add inputdir. else leave path alone.
134                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
135                                 }
136                         }
137                         
138                         phylipFileName = validParameter.validFile(parameters, "phylip", true);
139                         if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
140                         else if (phylipFileName == "not found") { 
141                                 //if there is a current phylip file, use it
142                                 phylipFileName = m->getPhylipFile(); 
143                                 if (phylipFileName != "") { m->mothurOut("Using " + phylipFileName + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
144                                 else {  m->mothurOut("You have no current phylip file and the phylip parameter is required."); m->mothurOutEndLine(); abort = true; }
145                                 
146                         }else { m->setPhylipFile(phylipFileName); }     
147                         
148                         //check for required parameters
149                         designFileName = validParameter.validFile(parameters, "design", true);
150                         if (designFileName == "not open") { abort = true; }
151                         else if (designFileName == "not found") {
152                                 //if there is a current design file, use it
153                                 designFileName = m->getDesignFile(); 
154                                 if (designFileName != "") { m->mothurOut("Using " + designFileName + " as input file for the design parameter."); m->mothurOutEndLine(); }
155                                 else {  m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }                                                           
156                         }else { m->setDesignFile(designFileName); }     
157                         
158                         string temp = validParameter.validFile(parameters, "iters", false);
159                         if (temp == "not found") { temp = "1000"; }
160                         m->mothurConvert(temp, iters); 
161                         
162                         temp = validParameter.validFile(parameters, "alpha", false);
163                         if (temp == "not found") { temp = "0.05"; }
164                         m->mothurConvert(temp, experimentwiseAlpha); 
165                 }
166                 
167         }
168         catch(exception& e) {
169                 m->errorOut(e, "HomovaCommand", "HomovaCommand");
170                 exit(1);
171         }
172 }
173 //**********************************************************************************************************************
174
175 int HomovaCommand::execute(){
176         try {
177                 
178                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
179                 
180                 //read design file
181                 designMap = new GroupMap(designFileName);
182                 designMap->readDesignMap();
183                 
184                 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
185                 
186                 //read in distance matrix and square it
187                 ReadPhylipVector readMatrix(phylipFileName);
188                 vector<string> sampleNames = readMatrix.read(distanceMatrix);
189                 
190                 for(int i=0;i<distanceMatrix.size();i++){
191                         for(int j=0;j<i;j++){
192                                 distanceMatrix[i][j] *= distanceMatrix[i][j];   
193                         }
194                 }
195                 
196                 //link designMap to rows/columns in distance matrix
197                 map<string, vector<int> > origGroupSampleMap;
198                 for(int i=0;i<sampleNames.size();i++){
199                         string group = designMap->getGroup(sampleNames[i]);
200                         
201                         if (group == "not found") {
202                                 m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
203                         }else { origGroupSampleMap[group].push_back(i); }
204                 }
205                 int numGroups = origGroupSampleMap.size();
206                 
207                 if (m->control_pressed) { delete designMap; return 0; }
208                 
209                 //create a new filename
210                 ofstream HOMOVAFile;
211                 string HOMOVAFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName)) + getOutputFileNameTag("homova");                          
212                 m->openOutputFile(HOMOVAFileName, HOMOVAFile);
213                 outputNames.push_back(HOMOVAFileName); outputTypes["homova"].push_back(HOMOVAFileName);
214                 
215                 HOMOVAFile << "HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values" << endl;
216                 m->mothurOut("HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values\n");
217                 
218                 double fullHOMOVAPValue = runHOMOVA(HOMOVAFile, origGroupSampleMap, experimentwiseAlpha);
219
220                 if(fullHOMOVAPValue <= experimentwiseAlpha && numGroups > 2){
221                         
222                         int numCombos = numGroups * (numGroups-1) / 2;
223                         double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
224                         
225                         map<string, vector<int> >::iterator itA;
226                         map<string, vector<int> >::iterator itB;
227                         
228                         for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
229                                 itB = itA;itB++;
230                                 for(;itB!=origGroupSampleMap.end();itB++){
231                                         map<string, vector<int> > pairwiseGroupSampleMap;
232                                         pairwiseGroupSampleMap[itA->first] = itA->second;
233                                         pairwiseGroupSampleMap[itB->first] = itB->second;
234                                         
235                                         runHOMOVA(HOMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha);
236                                 }                       
237                         }
238                         HOMOVAFile << endl;
239                         m->mothurOutEndLine();
240                         
241                         m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
242                         m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
243                 }
244                 else{
245                         m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
246                 }
247                 
248                 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
249                 
250                 delete designMap;
251                 
252                 m->mothurOutEndLine();
253                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
254                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
255                 m->mothurOutEndLine();
256                 
257                 return 0;
258         }
259         catch(exception& e) {
260                 m->errorOut(e, "HomovaCommand", "execute");
261                 exit(1);
262         }
263 }
264
265 //**********************************************************************************************************************
266
267 double HomovaCommand::runHOMOVA(ofstream& HOMOVAFile, map<string, vector<int> > groupSampleMap, double alpha){
268         try {
269                 map<string, vector<int> >::iterator it;
270                 int numGroups = groupSampleMap.size();
271                 
272                 vector<double> ssWithinOrigVector;
273                 double bValueOrig = calcBValue(groupSampleMap, ssWithinOrigVector);
274                 
275                 double counter = 0;
276                 for(int i=0;i<iters;i++){
277                         vector<double> ssWithinRandVector;
278                         map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
279                         double bValueRand = calcBValue(randomizedGroup, ssWithinRandVector);
280                         if(bValueRand > bValueOrig){    counter++;      }
281                 }
282                 
283                 double pValue = (double) counter / (double) iters;
284                 string pString = "";
285                 if(pValue < 1/(double)iters){   pString = '<' + toString(1/(double)iters);      }
286                 else                                            {       pString = toString(pValue);                                     }
287                 
288                 
289                 //print homova table
290                 it = groupSampleMap.begin();
291                 HOMOVAFile << it->first;
292                 m->mothurOut(it->first);
293                 it++;
294                 for(;it!=groupSampleMap.end();it++){
295                         HOMOVAFile << '-' << it->first;
296                         m->mothurOut('-' + it->first);
297                 }
298
299                 HOMOVAFile << '\t' << bValueOrig << '\t' << pString;
300                 m->mothurOut('\t' + toString(bValueOrig) + '\t' + pString);
301                 
302                 if(pValue < alpha){
303                         HOMOVAFile << "*";
304                         m->mothurOut("*");
305                 }
306
307                 for(int i=0;i<numGroups;i++){
308                         HOMOVAFile << '\t' << ssWithinOrigVector[i];
309                         m->mothurOut('\t' + toString(ssWithinOrigVector[i]));
310                 }
311                 HOMOVAFile << endl;
312                 m->mothurOutEndLine();
313                 
314                 return pValue;  
315         }
316         catch(exception& e) {
317                 m->errorOut(e, "HomovaCommand", "runHOMOVA");
318                 exit(1);
319         }
320 }
321
322 //**********************************************************************************************************************
323
324 double HomovaCommand::calcSigleSSWithin(vector<int> sampleIndices) {
325         try {
326                 double ssWithin = 0.0;
327                 int numSamplesInGroup = sampleIndices.size();
328                 
329                 for(int i=0;i<numSamplesInGroup;i++){
330                         int row = sampleIndices[i];
331                         
332                         for(int j=0;j<numSamplesInGroup;j++){
333                                 int col = sampleIndices[j];
334                                 
335                                 if(col < row){
336                                         ssWithin += distanceMatrix[row][col];
337                                 }
338                                 
339                         }
340                 }
341                 
342                 ssWithin /= numSamplesInGroup;
343                 return ssWithin;
344         }
345         catch(exception& e) {
346                 m->errorOut(e, "HomovaCommand", "calcSigleSSWithin");
347                 exit(1);
348         }
349 }
350
351 //**********************************************************************************************************************
352
353 double HomovaCommand::calcBValue(map<string, vector<int> > groupSampleMap, vector<double>& ssWithinVector) {
354         try {
355
356                 map<string, vector<int> >::iterator it;
357                 
358                 double numGroups = (double)groupSampleMap.size();
359                 ssWithinVector.resize(numGroups, 0);
360                 
361                 double totalNumSamples = 0;
362                 double ssWithinFull;
363                 double secondTermSum = 0;
364                 double inverseOneMinusSum = 0;
365                 int index = 0;
366                 
367                 ssWithinVector.resize(numGroups, 0);
368                 for(it = groupSampleMap.begin();it!=groupSampleMap.end();it++){
369                         int numSamplesInGroup = it->second.size();
370                         totalNumSamples += numSamplesInGroup;
371                         
372                         ssWithinVector[index] = calcSigleSSWithin(it->second);
373                         ssWithinFull += ssWithinVector[index];
374                         
375                         secondTermSum += (numSamplesInGroup - 1) * log(ssWithinVector[index] / (double)(numSamplesInGroup - 1));
376                         inverseOneMinusSum += 1.0 / (double)(numSamplesInGroup - 1);
377                         
378                         ssWithinVector[index] /= (double)(numSamplesInGroup - 1); //this line is only for output purposes to scale SSw by the number of samples in the group
379                         index++;
380                 }
381                 
382                 double B = (totalNumSamples - numGroups) * log(ssWithinFull/(totalNumSamples-numGroups)) - secondTermSum;
383                 double denomintor = 1 + 1.0/(3.0 * (numGroups - 1.0)) * (inverseOneMinusSum - 1.0 / (double) (totalNumSamples - numGroups));
384                 B /= denomintor;
385                 
386                 return B;
387                 
388         }
389         catch(exception& e) {
390                 m->errorOut(e, "HomovaCommand", "calcBValue");
391                 exit(1);
392         }
393 }
394
395 //**********************************************************************************************************************
396
397 map<string, vector<int> > HomovaCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
398         try{
399                 vector<int> sampleIndices;
400                 vector<int> samplesPerGroup;
401                 
402                 map<string, vector<int> >::iterator it;
403                 for(it=origMapping.begin();it!=origMapping.end();it++){
404                         vector<int> indices = it->second;
405                         samplesPerGroup.push_back(indices.size());
406                         sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
407                 }
408                 
409                 random_shuffle(sampleIndices.begin(), sampleIndices.end());
410                 
411                 int index = 0;
412                 map<string, vector<int> > randomizedGroups = origMapping;
413                 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
414                         for(int i=0;i<it->second.size();i++){
415                                 it->second[i] = sampleIndices[index++];                         
416                         }
417                 }
418                 
419                 return randomizedGroups;                
420         }
421         catch (exception& e) {
422                 m->errorOut(e, "AmovaCommand", "randomizeGroups");
423                 exit(1);
424         }
425 }
426
427 //**********************************************************************************************************************
428
429