5 * Created by westcott on 2/8/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "homovacommand.h"
12 #include "readphylipvector.h"
14 //**********************************************************************************************************************
16 vector<string> HomovaCommand::getValidParameters(){
18 string Array[] = {"outputdir","iters","phylip","design","alpha", "inputdir"};
19 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
23 m->errorOut(e, "HomovaCommand", "getValidParameters");
28 //**********************************************************************************************************************
30 HomovaCommand::HomovaCommand(){
32 abort = true; calledHelp = true;
33 vector<string> tempOutNames;
34 outputTypes["homova"] = tempOutNames;
37 m->errorOut(e, "HomovaCommand", "HomovaCommand");
42 //**********************************************************************************************************************
44 vector<string> HomovaCommand::getRequiredParameters(){
46 string Array[] = {"design"};
47 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
51 m->errorOut(e, "HomovaCommand", "getRequiredParameters");
56 //**********************************************************************************************************************
58 vector<string> HomovaCommand::getRequiredFiles(){
61 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
65 m->errorOut(e, "HomovaCommand", "getRequiredFiles");
70 //**********************************************************************************************************************
72 HomovaCommand::HomovaCommand(string option) {
74 abort = false; calledHelp = false;
76 //allow user to run help
77 if(option == "help") { help(); abort = true; calledHelp = true; }
80 //valid paramters for this command
81 string AlignArray[] = {"design","outputdir","iters","phylip","alpha", "inputdir"};
82 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
84 OptionParser parser(option);
85 map<string,string> parameters = parser.getParameters();
87 ValidParameters validParameter;
89 //check to make sure all parameters are valid for command
90 map<string,string>::iterator it;
91 for (it = parameters.begin(); it != parameters.end(); it++) {
92 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
95 //initialize outputTypes
96 vector<string> tempOutNames;
97 outputTypes["homova"] = tempOutNames;
99 //if the user changes the output directory command factory will send this info to us in the output parameter
100 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
102 //if the user changes the input directory command factory will send this info to us in the output parameter
103 string inputDir = validParameter.validFile(parameters, "inputdir", false);
104 if (inputDir == "not found"){ inputDir = ""; }
107 it = parameters.find("design");
108 //user has given a template file
109 if(it != parameters.end()){
110 path = m->hasPath(it->second);
111 //if the user has not given a path then, add inputdir. else leave path alone.
112 if (path == "") { parameters["design"] = inputDir + it->second; }
115 it = parameters.find("phylip");
116 //user has given a template file
117 if(it != parameters.end()){
118 path = m->hasPath(it->second);
119 //if the user has not given a path then, add inputdir. else leave path alone.
120 if (path == "") { parameters["phylip"] = inputDir + it->second; }
124 phylipFileName = validParameter.validFile(parameters, "phylip", true);
125 if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
126 else if (phylipFileName == "not found") { phylipFileName = ""; }
127 else if (designFileName == "not found") {
129 m->mothurOut("You must provide an phylip file.");
130 m->mothurOutEndLine();
134 //check for required parameters
135 designFileName = validParameter.validFile(parameters, "design", true);
136 if (designFileName == "not open") { abort = true; }
137 else if (designFileName == "not found") {
139 m->mothurOut("You must provide an design file.");
140 m->mothurOutEndLine();
144 string temp = validParameter.validFile(parameters, "iters", false);
145 if (temp == "not found") { temp = "1000"; }
146 convert(temp, iters);
148 temp = validParameter.validFile(parameters, "alpha", false);
149 if (temp == "not found") { temp = "0.05"; }
150 convert(temp, experimentwiseAlpha);
154 catch(exception& e) {
155 m->errorOut(e, "HomovaCommand", "HomovaCommand");
160 //**********************************************************************************************************************
162 void HomovaCommand::help(){
164 m->mothurOut("Referenced: Stewart CN, Excoffier L (1996). Assessing population genetic structure and variability with RAPD data: Application to Vaccinium macrocarpon (American Cranberry). J Evol Biol 9: 153-71.\n");
165 m->mothurOut("The homova command outputs a .homova file. \n");
166 m->mothurOut("The homova command parameters are phylip, iters, and alpha. The phylip and design parameters are required.\n");
167 m->mothurOut("The design parameter allows you to assign your samples to groups when you are running homova. It is required. \n");
168 m->mothurOut("The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.\n");
169 m->mothurOut("The iters parameter allows you to set number of randomization for the P value. The default is 1000. \n");
170 m->mothurOut("The homova command should be in the following format: homova(phylip=file.dist, design=file.design).\n");
171 m->mothurOut("Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).\n\n");
173 catch(exception& e) {
174 m->errorOut(e, "HomovaCommand", "help");
179 //**********************************************************************************************************************
181 HomovaCommand::~HomovaCommand(){}
183 //**********************************************************************************************************************
185 int HomovaCommand::execute(){
188 if (abort == true) { if (calledHelp) { return 0; } return 2; }
191 designMap = new GroupMap(designFileName);
192 designMap->readDesignMap();
194 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
196 //read in distance matrix and square it
197 ReadPhylipVector readMatrix(phylipFileName);
198 vector<string> sampleNames = readMatrix.read(distanceMatrix);
200 for(int i=0;i<distanceMatrix.size();i++){
201 for(int j=0;j<i;j++){
202 distanceMatrix[i][j] *= distanceMatrix[i][j];
206 //link designMap to rows/columns in distance matrix
207 map<string, vector<int> > origGroupSampleMap;
208 for(int i=0;i<sampleNames.size();i++){
209 origGroupSampleMap[designMap->getGroup(sampleNames[i])].push_back(i);
211 int numGroups = origGroupSampleMap.size();
213 //create a new filename
215 string HOMOVAFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName)) + "homova";
216 m->openOutputFile(HOMOVAFileName, HOMOVAFile);
217 outputNames.push_back(HOMOVAFileName); outputTypes["homova"].push_back(HOMOVAFileName);
219 HOMOVAFile << "HOMOVA\tBValue\tP-value\tSSwithin_values" << endl;
220 m->mothurOut("HOMOVA\tBValue\tP-value\tSSwithin_values\n");
222 double fullHOMOVAPValue = runHOMOVA(HOMOVAFile, origGroupSampleMap, experimentwiseAlpha);
224 if(fullHOMOVAPValue <= experimentwiseAlpha && numGroups > 2){
226 int numCombos = numGroups * (numGroups-1) / 2;
227 double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
229 map<string, vector<int> >::iterator itA;
230 map<string, vector<int> >::iterator itB;
232 for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
234 for(itB;itB!=origGroupSampleMap.end();itB++){
236 map<string, vector<int> > pairwiseGroupSampleMap;
237 pairwiseGroupSampleMap[itA->first] = itA->second;
238 pairwiseGroupSampleMap[itB->first] = itB->second;
240 runHOMOVA(HOMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha);
244 m->mothurOutEndLine();
246 m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
247 m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
250 m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
253 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
257 m->mothurOutEndLine();
258 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
259 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
260 m->mothurOutEndLine();
264 catch(exception& e) {
265 m->errorOut(e, "HomovaCommand", "execute");
270 //**********************************************************************************************************************
272 double HomovaCommand::runHOMOVA(ofstream& HOMOVAFile, map<string, vector<int> > groupSampleMap, double alpha){
274 map<string, vector<int> >::iterator it;
275 int numGroups = groupSampleMap.size();
277 vector<double> ssWithinOrigVector;
278 double bValueOrig = calcBValue(groupSampleMap, ssWithinOrigVector);
281 for(int i=0;i<iters;i++){
282 vector<double> ssWithinRandVector;
283 map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
284 double bValueRand = calcBValue(randomizedGroup, ssWithinRandVector);
285 if(bValueRand > bValueOrig){ counter++; }
288 double pValue = (double) counter / (double) iters;
290 if(pValue < 1/(double)iters){ pString = '<' + toString(1/(double)iters); }
291 else { pString = toString(pValue); }
295 it = groupSampleMap.begin();
296 HOMOVAFile << it->first;
297 m->mothurOut(it->first);
299 for(it;it!=groupSampleMap.end();it++){
300 HOMOVAFile << '-' << it->first;
301 m->mothurOut('-' + it->first);
304 HOMOVAFile << '\t' << bValueOrig << '\t' << pString;
305 m->mothurOut('\t' + toString(bValueOrig) + '\t' + pString);
312 for(int i=0;i<numGroups;i++){
313 HOMOVAFile << '\t' << ssWithinOrigVector[i];
314 m->mothurOut('\t' + toString(ssWithinOrigVector[i]));
317 m->mothurOutEndLine();
321 catch(exception& e) {
322 m->errorOut(e, "HomovaCommand", "runHOMOVA");
327 //**********************************************************************************************************************
329 double HomovaCommand::calcSigleSSWithin(vector<int> sampleIndices) {
331 double ssWithin = 0.0;
332 int numSamplesInGroup = sampleIndices.size();
334 for(int i=0;i<numSamplesInGroup;i++){
335 int row = sampleIndices[i];
337 for(int j=0;j<numSamplesInGroup;j++){
338 int col = sampleIndices[j];
341 ssWithin += distanceMatrix[row][col];
347 ssWithin /= numSamplesInGroup;
350 catch(exception& e) {
351 m->errorOut(e, "HomovaCommand", "calcSigleSSWithin");
356 //**********************************************************************************************************************
358 double HomovaCommand::calcBValue(map<string, vector<int> > groupSampleMap, vector<double>& ssWithinVector) {
361 map<string, vector<int> >::iterator it;
363 double numGroups = (double)groupSampleMap.size();
364 ssWithinVector.resize(numGroups, 0);
366 double totalNumSamples = 0;
368 double secondTermSum = 0;
369 double inverseOneMinusSum = 0;
372 ssWithinVector.resize(numGroups, 0);
373 for(it = groupSampleMap.begin();it!=groupSampleMap.end();it++){
374 int numSamplesInGroup = it->second.size();
375 totalNumSamples += numSamplesInGroup;
377 ssWithinVector[index] = calcSigleSSWithin(it->second);
378 ssWithinFull += ssWithinVector[index];
380 secondTermSum += (numSamplesInGroup - 1) * log(ssWithinVector[index] / (double)(numSamplesInGroup - 1));
381 inverseOneMinusSum += 1.0 / (double)(numSamplesInGroup - 1);
385 double B = (totalNumSamples - numGroups) * log(ssWithinFull/(totalNumSamples-numGroups)) - secondTermSum;
386 double denomintor = 1 + 1.0/(3.0 * (numGroups - 1.0)) * (inverseOneMinusSum - 1.0 / (double) (totalNumSamples - numGroups));
392 catch(exception& e) {
393 m->errorOut(e, "HomovaCommand", "calcBValue");
398 //**********************************************************************************************************************
400 map<string, vector<int> > HomovaCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
402 vector<int> sampleIndices;
403 vector<int> samplesPerGroup;
405 map<string, vector<int> >::iterator it;
406 for(it=origMapping.begin();it!=origMapping.end();it++){
407 vector<int> indices = it->second;
408 samplesPerGroup.push_back(indices.size());
409 sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
412 random_shuffle(sampleIndices.begin(), sampleIndices.end());
415 map<string, vector<int> > randomizedGroups = origMapping;
416 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
417 for(int i=0;i<it->second.size();i++){
418 it->second[i] = sampleIndices[index++];
422 return randomizedGroups;
424 catch (exception& e) {
425 m->errorOut(e, "AmovaCommand", "randomizeGroups");
430 //**********************************************************************************************************************