5 * Created by westcott on 2/8/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "homovacommand.h"
12 #include "readphylipvector.h"
14 //**********************************************************************************************************************
15 vector<string> HomovaCommand::setParameters(){
17 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
18 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pphylip);
19 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
20 CommandParameter palpha("alpha", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(palpha);
21 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
22 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
24 vector<string> myArray;
25 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
29 m->errorOut(e, "HomovaCommand", "setParameters");
33 //**********************************************************************************************************************
34 string HomovaCommand::getHelpString(){
36 string helpString = "";
37 helpString += "Referenced: Stewart CN, Excoffier L (1996). Assessing population genetic structure and variability with RAPD data: Application to Vaccinium macrocarpon (American Cranberry). J Evol Biol 9: 153-71.\n";
38 helpString += "The homova command outputs a .homova file. \n";
39 helpString += "The homova command parameters are phylip, iters, and alpha. The phylip and design parameters are required, unless valid current files exist.\n";
40 helpString += "The design parameter allows you to assign your samples to groups when you are running homova. It is required. \n";
41 helpString += "The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.\n";
42 helpString += "The iters parameter allows you to set number of randomization for the P value. The default is 1000. \n";
43 helpString += "The homova command should be in the following format: homova(phylip=file.dist, design=file.design).\n";
44 helpString += "Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).\n";
48 m->errorOut(e, "HomovaCommand", "getHelpString");
53 //**********************************************************************************************************************
55 HomovaCommand::HomovaCommand(){
57 abort = true; calledHelp = true;
59 vector<string> tempOutNames;
60 outputTypes["homova"] = tempOutNames;
63 m->errorOut(e, "HomovaCommand", "HomovaCommand");
67 //**********************************************************************************************************************
69 HomovaCommand::HomovaCommand(string option) {
71 abort = false; calledHelp = false;
73 //allow user to run help
74 if(option == "help") { help(); abort = true; calledHelp = true; }
75 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
78 vector<string> myArray = setParameters();
80 OptionParser parser(option);
81 map<string,string> parameters = parser.getParameters();
83 ValidParameters validParameter;
85 //check to make sure all parameters are valid for command
86 map<string,string>::iterator it;
87 for (it = parameters.begin(); it != parameters.end(); it++) {
88 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
91 //initialize outputTypes
92 vector<string> tempOutNames;
93 outputTypes["homova"] = tempOutNames;
95 //if the user changes the output directory command factory will send this info to us in the output parameter
96 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
98 //if the user changes the input directory command factory will send this info to us in the output parameter
99 string inputDir = validParameter.validFile(parameters, "inputdir", false);
100 if (inputDir == "not found"){ inputDir = ""; }
103 it = parameters.find("design");
104 //user has given a template file
105 if(it != parameters.end()){
106 path = m->hasPath(it->second);
107 //if the user has not given a path then, add inputdir. else leave path alone.
108 if (path == "") { parameters["design"] = inputDir + it->second; }
111 it = parameters.find("phylip");
112 //user has given a template file
113 if(it != parameters.end()){
114 path = m->hasPath(it->second);
115 //if the user has not given a path then, add inputdir. else leave path alone.
116 if (path == "") { parameters["phylip"] = inputDir + it->second; }
120 phylipFileName = validParameter.validFile(parameters, "phylip", true);
121 if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
122 else if (phylipFileName == "not found") {
123 //if there is a current phylip file, use it
124 phylipFileName = m->getPhylipFile();
125 if (phylipFileName != "") { m->mothurOut("Using " + phylipFileName + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
126 else { m->mothurOut("You have no current phylip file and the phylip parameter is required."); m->mothurOutEndLine(); abort = true; }
128 }else { m->setPhylipFile(phylipFileName); }
130 //check for required parameters
131 designFileName = validParameter.validFile(parameters, "design", true);
132 if (designFileName == "not open") { abort = true; }
133 else if (designFileName == "not found") {
134 //if there is a current design file, use it
135 designFileName = m->getDesignFile();
136 if (designFileName != "") { m->mothurOut("Using " + designFileName + " as input file for the design parameter."); m->mothurOutEndLine(); }
137 else { m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
138 }else { m->setDesignFile(designFileName); }
140 string temp = validParameter.validFile(parameters, "iters", false);
141 if (temp == "not found") { temp = "1000"; }
142 m->mothurConvert(temp, iters);
144 temp = validParameter.validFile(parameters, "alpha", false);
145 if (temp == "not found") { temp = "0.05"; }
146 m->mothurConvert(temp, experimentwiseAlpha);
150 catch(exception& e) {
151 m->errorOut(e, "HomovaCommand", "HomovaCommand");
155 //**********************************************************************************************************************
157 int HomovaCommand::execute(){
160 if (abort == true) { if (calledHelp) { return 0; } return 2; }
163 designMap = new GroupMap(designFileName);
164 designMap->readDesignMap();
166 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
168 //read in distance matrix and square it
169 ReadPhylipVector readMatrix(phylipFileName);
170 vector<string> sampleNames = readMatrix.read(distanceMatrix);
172 for(int i=0;i<distanceMatrix.size();i++){
173 for(int j=0;j<i;j++){
174 distanceMatrix[i][j] *= distanceMatrix[i][j];
178 //link designMap to rows/columns in distance matrix
179 map<string, vector<int> > origGroupSampleMap;
180 for(int i=0;i<sampleNames.size();i++){
181 string group = designMap->getGroup(sampleNames[i]);
183 if (group == "not found") {
184 m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
185 }else { origGroupSampleMap[group].push_back(i); }
187 int numGroups = origGroupSampleMap.size();
189 if (m->control_pressed) { delete designMap; return 0; }
191 //create a new filename
193 string HOMOVAFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName)) + "homova";
194 m->openOutputFile(HOMOVAFileName, HOMOVAFile);
195 outputNames.push_back(HOMOVAFileName); outputTypes["homova"].push_back(HOMOVAFileName);
197 HOMOVAFile << "HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values" << endl;
198 m->mothurOut("HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values\n");
200 double fullHOMOVAPValue = runHOMOVA(HOMOVAFile, origGroupSampleMap, experimentwiseAlpha);
202 if(fullHOMOVAPValue <= experimentwiseAlpha && numGroups > 2){
204 int numCombos = numGroups * (numGroups-1) / 2;
205 double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
207 map<string, vector<int> >::iterator itA;
208 map<string, vector<int> >::iterator itB;
210 for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
212 for(;itB!=origGroupSampleMap.end();itB++){
213 map<string, vector<int> > pairwiseGroupSampleMap;
214 pairwiseGroupSampleMap[itA->first] = itA->second;
215 pairwiseGroupSampleMap[itB->first] = itB->second;
217 runHOMOVA(HOMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha);
221 m->mothurOutEndLine();
223 m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
224 m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
227 m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
230 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
234 m->mothurOutEndLine();
235 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
236 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
237 m->mothurOutEndLine();
241 catch(exception& e) {
242 m->errorOut(e, "HomovaCommand", "execute");
247 //**********************************************************************************************************************
249 double HomovaCommand::runHOMOVA(ofstream& HOMOVAFile, map<string, vector<int> > groupSampleMap, double alpha){
251 map<string, vector<int> >::iterator it;
252 int numGroups = groupSampleMap.size();
254 vector<double> ssWithinOrigVector;
255 double bValueOrig = calcBValue(groupSampleMap, ssWithinOrigVector);
258 for(int i=0;i<iters;i++){
259 vector<double> ssWithinRandVector;
260 map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
261 double bValueRand = calcBValue(randomizedGroup, ssWithinRandVector);
262 if(bValueRand > bValueOrig){ counter++; }
265 double pValue = (double) counter / (double) iters;
267 if(pValue < 1/(double)iters){ pString = '<' + toString(1/(double)iters); }
268 else { pString = toString(pValue); }
272 it = groupSampleMap.begin();
273 HOMOVAFile << it->first;
274 m->mothurOut(it->first);
276 for(;it!=groupSampleMap.end();it++){
277 HOMOVAFile << '-' << it->first;
278 m->mothurOut('-' + it->first);
281 HOMOVAFile << '\t' << bValueOrig << '\t' << pString;
282 m->mothurOut('\t' + toString(bValueOrig) + '\t' + pString);
289 for(int i=0;i<numGroups;i++){
290 HOMOVAFile << '\t' << ssWithinOrigVector[i];
291 m->mothurOut('\t' + toString(ssWithinOrigVector[i]));
294 m->mothurOutEndLine();
298 catch(exception& e) {
299 m->errorOut(e, "HomovaCommand", "runHOMOVA");
304 //**********************************************************************************************************************
306 double HomovaCommand::calcSigleSSWithin(vector<int> sampleIndices) {
308 double ssWithin = 0.0;
309 int numSamplesInGroup = sampleIndices.size();
311 for(int i=0;i<numSamplesInGroup;i++){
312 int row = sampleIndices[i];
314 for(int j=0;j<numSamplesInGroup;j++){
315 int col = sampleIndices[j];
318 ssWithin += distanceMatrix[row][col];
324 ssWithin /= numSamplesInGroup;
327 catch(exception& e) {
328 m->errorOut(e, "HomovaCommand", "calcSigleSSWithin");
333 //**********************************************************************************************************************
335 double HomovaCommand::calcBValue(map<string, vector<int> > groupSampleMap, vector<double>& ssWithinVector) {
338 map<string, vector<int> >::iterator it;
340 double numGroups = (double)groupSampleMap.size();
341 ssWithinVector.resize(numGroups, 0);
343 double totalNumSamples = 0;
345 double secondTermSum = 0;
346 double inverseOneMinusSum = 0;
349 ssWithinVector.resize(numGroups, 0);
350 for(it = groupSampleMap.begin();it!=groupSampleMap.end();it++){
351 int numSamplesInGroup = it->second.size();
352 totalNumSamples += numSamplesInGroup;
354 ssWithinVector[index] = calcSigleSSWithin(it->second);
355 ssWithinFull += ssWithinVector[index];
357 secondTermSum += (numSamplesInGroup - 1) * log(ssWithinVector[index] / (double)(numSamplesInGroup - 1));
358 inverseOneMinusSum += 1.0 / (double)(numSamplesInGroup - 1);
360 ssWithinVector[index] /= (double)(numSamplesInGroup - 1); //this line is only for output purposes to scale SSw by the number of samples in the group
364 double B = (totalNumSamples - numGroups) * log(ssWithinFull/(totalNumSamples-numGroups)) - secondTermSum;
365 double denomintor = 1 + 1.0/(3.0 * (numGroups - 1.0)) * (inverseOneMinusSum - 1.0 / (double) (totalNumSamples - numGroups));
371 catch(exception& e) {
372 m->errorOut(e, "HomovaCommand", "calcBValue");
377 //**********************************************************************************************************************
379 map<string, vector<int> > HomovaCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
381 vector<int> sampleIndices;
382 vector<int> samplesPerGroup;
384 map<string, vector<int> >::iterator it;
385 for(it=origMapping.begin();it!=origMapping.end();it++){
386 vector<int> indices = it->second;
387 samplesPerGroup.push_back(indices.size());
388 sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
391 random_shuffle(sampleIndices.begin(), sampleIndices.end());
394 map<string, vector<int> > randomizedGroups = origMapping;
395 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
396 for(int i=0;i<it->second.size();i++){
397 it->second[i] = sampleIndices[index++];
401 return randomizedGroups;
403 catch (exception& e) {
404 m->errorOut(e, "AmovaCommand", "randomizeGroups");
409 //**********************************************************************************************************************