5 * Created by westcott on 2/8/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "homovacommand.h"
12 #include "readphylipvector.h"
13 #include "sharedutilities.h"
15 //**********************************************************************************************************************
16 vector<string> HomovaCommand::setParameters(){
18 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","homova",false,true,true); parameters.push_back(pdesign);
19 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none","homova",false,true,true); parameters.push_back(pphylip);
20 CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets);
21 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
22 CommandParameter palpha("alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha);
23 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
24 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
26 vector<string> myArray;
27 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
31 m->errorOut(e, "HomovaCommand", "setParameters");
35 //**********************************************************************************************************************
36 string HomovaCommand::getHelpString(){
38 string helpString = "";
39 helpString += "Referenced: Stewart CN, Excoffier L (1996). Assessing population genetic structure and variability with RAPD data: Application to Vaccinium macrocarpon (American Cranberry). J Evol Biol 9: 153-71.\n";
40 helpString += "The homova command outputs a .homova file. \n";
41 helpString += "The homova command parameters are phylip, iters, sets and alpha. The phylip and design parameters are required, unless valid current files exist.\n";
42 helpString += "The design parameter allows you to assign your samples to groups when you are running homova. It is required. \n";
43 helpString += "The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.\n";
44 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
45 helpString += "The iters parameter allows you to set number of randomization for the P value. The default is 1000. \n";
46 helpString += "The homova command should be in the following format: homova(phylip=file.dist, design=file.design).\n";
47 helpString += "Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).\n";
51 m->errorOut(e, "HomovaCommand", "getHelpString");
55 //**********************************************************************************************************************
56 string HomovaCommand::getOutputPattern(string type) {
60 if (type == "homova") { pattern = "[filename],homova"; }
61 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
66 m->errorOut(e, "HomovaCommand", "getOutputPattern");
70 //**********************************************************************************************************************
71 HomovaCommand::HomovaCommand(){
73 abort = true; calledHelp = true;
75 vector<string> tempOutNames;
76 outputTypes["homova"] = tempOutNames;
79 m->errorOut(e, "HomovaCommand", "HomovaCommand");
83 //**********************************************************************************************************************
85 HomovaCommand::HomovaCommand(string option) {
87 abort = false; calledHelp = false;
89 //allow user to run help
90 if(option == "help") { help(); abort = true; calledHelp = true; }
91 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
94 vector<string> myArray = setParameters();
96 OptionParser parser(option);
97 map<string,string> parameters = parser.getParameters();
99 ValidParameters validParameter;
101 //check to make sure all parameters are valid for command
102 map<string,string>::iterator it;
103 for (it = parameters.begin(); it != parameters.end(); it++) {
104 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
107 //initialize outputTypes
108 vector<string> tempOutNames;
109 outputTypes["homova"] = tempOutNames;
111 //if the user changes the output directory command factory will send this info to us in the output parameter
112 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
114 //if the user changes the input directory command factory will send this info to us in the output parameter
115 string inputDir = validParameter.validFile(parameters, "inputdir", false);
116 if (inputDir == "not found"){ inputDir = ""; }
119 it = parameters.find("design");
120 //user has given a template file
121 if(it != parameters.end()){
122 path = m->hasPath(it->second);
123 //if the user has not given a path then, add inputdir. else leave path alone.
124 if (path == "") { parameters["design"] = inputDir + it->second; }
127 it = parameters.find("phylip");
128 //user has given a template file
129 if(it != parameters.end()){
130 path = m->hasPath(it->second);
131 //if the user has not given a path then, add inputdir. else leave path alone.
132 if (path == "") { parameters["phylip"] = inputDir + it->second; }
136 phylipFileName = validParameter.validFile(parameters, "phylip", true);
137 if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
138 else if (phylipFileName == "not found") {
139 //if there is a current phylip file, use it
140 phylipFileName = m->getPhylipFile();
141 if (phylipFileName != "") { m->mothurOut("Using " + phylipFileName + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
142 else { m->mothurOut("You have no current phylip file and the phylip parameter is required."); m->mothurOutEndLine(); abort = true; }
144 }else { m->setPhylipFile(phylipFileName); }
146 //check for required parameters
147 designFileName = validParameter.validFile(parameters, "design", true);
148 if (designFileName == "not open") { abort = true; }
149 else if (designFileName == "not found") {
150 //if there is a current design file, use it
151 designFileName = m->getDesignFile();
152 if (designFileName != "") { m->mothurOut("Using " + designFileName + " as input file for the design parameter."); m->mothurOutEndLine(); }
153 else { m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
154 }else { m->setDesignFile(designFileName); }
156 string temp = validParameter.validFile(parameters, "iters", false);
157 if (temp == "not found") { temp = "1000"; }
158 m->mothurConvert(temp, iters);
160 temp = validParameter.validFile(parameters, "alpha", false);
161 if (temp == "not found") { temp = "0.05"; }
162 m->mothurConvert(temp, experimentwiseAlpha);
164 string sets = validParameter.validFile(parameters, "sets", false);
165 if (sets == "not found") { sets = ""; }
167 m->splitAtDash(sets, Sets);
172 catch(exception& e) {
173 m->errorOut(e, "HomovaCommand", "HomovaCommand");
177 //**********************************************************************************************************************
179 int HomovaCommand::execute(){
182 if (abort == true) { if (calledHelp) { return 0; } return 2; }
185 designMap = new GroupMap(designFileName);
186 designMap->readDesignMap();
188 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
190 //read in distance matrix and square it
191 ReadPhylipVector readMatrix(phylipFileName);
192 vector<string> sampleNames = readMatrix.read(distanceMatrix);
194 if (Sets.size() != 0) { //user selected sets, so we want to remove the samples not in those sets
196 vector<string> dGroups = designMap->getNamesOfGroups();
197 util.setGroups(Sets, dGroups);
199 for(int i=0;i<distanceMatrix.size();i++){
201 if (m->control_pressed) { delete designMap; return 0; }
203 string group = designMap->getGroup(sampleNames[i]);
205 if (group == "not found") {
206 m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
207 }else if (!m->inUsersGroups(group, Sets)){ //not in set we want remove it
208 //remove from all other rows
209 for(int j=0;j<distanceMatrix.size();j++){
210 distanceMatrix[j].erase(distanceMatrix[j].begin()+i);
212 distanceMatrix.erase(distanceMatrix.begin()+i);
213 sampleNames.erase(sampleNames.begin()+i);
219 for(int i=0;i<distanceMatrix.size();i++){
220 for(int j=0;j<i;j++){
221 distanceMatrix[i][j] *= distanceMatrix[i][j];
225 //link designMap to rows/columns in distance matrix
226 map<string, vector<int> > origGroupSampleMap;
227 for(int i=0;i<sampleNames.size();i++){
228 string group = designMap->getGroup(sampleNames[i]);
230 if (group == "not found") {
231 m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
232 }else { origGroupSampleMap[group].push_back(i); }
234 int numGroups = origGroupSampleMap.size();
236 if (m->control_pressed) { delete designMap; return 0; }
238 //create a new filename
240 map<string, string> variables;
241 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(phylipFileName));
242 string HOMOVAFileName = getOutputFileName("homova", variables);
243 m->openOutputFile(HOMOVAFileName, HOMOVAFile);
244 outputNames.push_back(HOMOVAFileName); outputTypes["homova"].push_back(HOMOVAFileName);
246 HOMOVAFile << "HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values" << endl;
247 m->mothurOut("HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values\n");
249 double fullHOMOVAPValue = runHOMOVA(HOMOVAFile, origGroupSampleMap, experimentwiseAlpha);
251 if(fullHOMOVAPValue <= experimentwiseAlpha && numGroups > 2){
253 int numCombos = numGroups * (numGroups-1) / 2;
254 double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
256 map<string, vector<int> >::iterator itA;
257 map<string, vector<int> >::iterator itB;
259 for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
261 for(;itB!=origGroupSampleMap.end();itB++){
262 map<string, vector<int> > pairwiseGroupSampleMap;
263 pairwiseGroupSampleMap[itA->first] = itA->second;
264 pairwiseGroupSampleMap[itB->first] = itB->second;
266 runHOMOVA(HOMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha);
270 m->mothurOutEndLine();
272 m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
273 m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
276 m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
279 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
283 m->mothurOutEndLine();
284 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
285 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
286 m->mothurOutEndLine();
290 catch(exception& e) {
291 m->errorOut(e, "HomovaCommand", "execute");
296 //**********************************************************************************************************************
298 double HomovaCommand::runHOMOVA(ofstream& HOMOVAFile, map<string, vector<int> > groupSampleMap, double alpha){
300 map<string, vector<int> >::iterator it;
301 int numGroups = groupSampleMap.size();
303 vector<double> ssWithinOrigVector;
304 double bValueOrig = calcBValue(groupSampleMap, ssWithinOrigVector);
307 for(int i=0;i<iters;i++){
308 vector<double> ssWithinRandVector;
309 map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
310 double bValueRand = calcBValue(randomizedGroup, ssWithinRandVector);
311 if(bValueRand >= bValueOrig){ counter++; }
314 double pValue = (double) counter / (double) iters;
316 if(pValue < 1/(double)iters){ pString = '<' + toString(1/(double)iters); }
317 else { pString = toString(pValue); }
321 it = groupSampleMap.begin();
322 HOMOVAFile << it->first;
323 m->mothurOut(it->first);
325 for(;it!=groupSampleMap.end();it++){
326 HOMOVAFile << '-' << it->first;
327 m->mothurOut('-' + it->first);
330 HOMOVAFile << '\t' << bValueOrig << '\t' << pString;
331 m->mothurOut('\t' + toString(bValueOrig) + '\t' + pString);
338 for(int i=0;i<numGroups;i++){
339 HOMOVAFile << '\t' << ssWithinOrigVector[i];
340 m->mothurOut('\t' + toString(ssWithinOrigVector[i]));
343 m->mothurOutEndLine();
347 catch(exception& e) {
348 m->errorOut(e, "HomovaCommand", "runHOMOVA");
353 //**********************************************************************************************************************
355 double HomovaCommand::calcSigleSSWithin(vector<int> sampleIndices) {
357 double ssWithin = 0.0;
358 int numSamplesInGroup = sampleIndices.size();
360 for(int i=0;i<numSamplesInGroup;i++){
361 int row = sampleIndices[i];
363 for(int j=0;j<numSamplesInGroup;j++){
364 int col = sampleIndices[j];
367 ssWithin += distanceMatrix[row][col];
373 ssWithin /= numSamplesInGroup;
376 catch(exception& e) {
377 m->errorOut(e, "HomovaCommand", "calcSigleSSWithin");
382 //**********************************************************************************************************************
384 double HomovaCommand::calcBValue(map<string, vector<int> > groupSampleMap, vector<double>& ssWithinVector) {
387 map<string, vector<int> >::iterator it;
389 double numGroups = (double)groupSampleMap.size();
390 ssWithinVector.resize(numGroups, 0);
392 double totalNumSamples = 0;
394 double secondTermSum = 0;
395 double inverseOneMinusSum = 0;
398 ssWithinVector.resize(numGroups, 0);
399 for(it = groupSampleMap.begin();it!=groupSampleMap.end();it++){
400 int numSamplesInGroup = it->second.size();
401 totalNumSamples += numSamplesInGroup;
403 ssWithinVector[index] = calcSigleSSWithin(it->second);
404 ssWithinFull += ssWithinVector[index];
406 secondTermSum += (numSamplesInGroup - 1) * log(ssWithinVector[index] / (double)(numSamplesInGroup - 1));
407 inverseOneMinusSum += 1.0 / (double)(numSamplesInGroup - 1);
409 ssWithinVector[index] /= (double)(numSamplesInGroup - 1); //this line is only for output purposes to scale SSw by the number of samples in the group
413 double B = (totalNumSamples - numGroups) * log(ssWithinFull/(totalNumSamples-numGroups)) - secondTermSum;
414 double denomintor = 1 + 1.0/(3.0 * (numGroups - 1.0)) * (inverseOneMinusSum - 1.0 / (double) (totalNumSamples - numGroups));
420 catch(exception& e) {
421 m->errorOut(e, "HomovaCommand", "calcBValue");
426 //**********************************************************************************************************************
428 map<string, vector<int> > HomovaCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
430 vector<int> sampleIndices;
431 vector<int> samplesPerGroup;
433 map<string, vector<int> >::iterator it;
434 for(it=origMapping.begin();it!=origMapping.end();it++){
435 vector<int> indices = it->second;
436 samplesPerGroup.push_back(indices.size());
437 sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
440 random_shuffle(sampleIndices.begin(), sampleIndices.end());
443 map<string, vector<int> > randomizedGroups = origMapping;
444 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
445 for(int i=0;i<it->second.size();i++){
446 it->second[i] = sampleIndices[index++];
450 return randomizedGroups;
452 catch (exception& e) {
453 m->errorOut(e, "AmovaCommand", "randomizeGroups");
458 //**********************************************************************************************************************