5 * Created by westcott on 2/8/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "homovacommand.h"
12 #include "readphylipvector.h"
13 #include "sharedutilities.h"
15 //**********************************************************************************************************************
16 vector<string> HomovaCommand::setParameters(){
18 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
19 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pphylip);
20 CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
21 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
22 CommandParameter palpha("alpha", "Number", "", "0.05", "", "", "",false,false); parameters.push_back(palpha);
23 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26 vector<string> myArray;
27 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
31 m->errorOut(e, "HomovaCommand", "setParameters");
35 //**********************************************************************************************************************
36 string HomovaCommand::getHelpString(){
38 string helpString = "";
39 helpString += "Referenced: Stewart CN, Excoffier L (1996). Assessing population genetic structure and variability with RAPD data: Application to Vaccinium macrocarpon (American Cranberry). J Evol Biol 9: 153-71.\n";
40 helpString += "The homova command outputs a .homova file. \n";
41 helpString += "The homova command parameters are phylip, iters, sets and alpha. The phylip and design parameters are required, unless valid current files exist.\n";
42 helpString += "The design parameter allows you to assign your samples to groups when you are running homova. It is required. \n";
43 helpString += "The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the sample name and the second column is the group the sample belongs to.\n";
44 helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
45 helpString += "The iters parameter allows you to set number of randomization for the P value. The default is 1000. \n";
46 helpString += "The homova command should be in the following format: homova(phylip=file.dist, design=file.design).\n";
47 helpString += "Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e. 1000).\n";
51 m->errorOut(e, "HomovaCommand", "getHelpString");
55 //**********************************************************************************************************************
56 string HomovaCommand::getOutputFileNameTag(string type, string inputName=""){
58 string outputFileName = "";
59 map<string, vector<string> >::iterator it;
61 //is this a type this command creates
62 it = outputTypes.find(type);
63 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
65 if (type == "homova") { outputFileName = "homova"; }
66 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
68 return outputFileName;
71 m->errorOut(e, "HomovaCommand", "getOutputFileNameTag");
75 //**********************************************************************************************************************
76 HomovaCommand::HomovaCommand(){
78 abort = true; calledHelp = true;
80 vector<string> tempOutNames;
81 outputTypes["homova"] = tempOutNames;
84 m->errorOut(e, "HomovaCommand", "HomovaCommand");
88 //**********************************************************************************************************************
90 HomovaCommand::HomovaCommand(string option) {
92 abort = false; calledHelp = false;
94 //allow user to run help
95 if(option == "help") { help(); abort = true; calledHelp = true; }
96 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
99 vector<string> myArray = setParameters();
101 OptionParser parser(option);
102 map<string,string> parameters = parser.getParameters();
104 ValidParameters validParameter;
106 //check to make sure all parameters are valid for command
107 map<string,string>::iterator it;
108 for (it = parameters.begin(); it != parameters.end(); it++) {
109 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
112 //initialize outputTypes
113 vector<string> tempOutNames;
114 outputTypes["homova"] = tempOutNames;
116 //if the user changes the output directory command factory will send this info to us in the output parameter
117 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
119 //if the user changes the input directory command factory will send this info to us in the output parameter
120 string inputDir = validParameter.validFile(parameters, "inputdir", false);
121 if (inputDir == "not found"){ inputDir = ""; }
124 it = parameters.find("design");
125 //user has given a template file
126 if(it != parameters.end()){
127 path = m->hasPath(it->second);
128 //if the user has not given a path then, add inputdir. else leave path alone.
129 if (path == "") { parameters["design"] = inputDir + it->second; }
132 it = parameters.find("phylip");
133 //user has given a template file
134 if(it != parameters.end()){
135 path = m->hasPath(it->second);
136 //if the user has not given a path then, add inputdir. else leave path alone.
137 if (path == "") { parameters["phylip"] = inputDir + it->second; }
141 phylipFileName = validParameter.validFile(parameters, "phylip", true);
142 if (phylipFileName == "not open") { phylipFileName = ""; abort = true; }
143 else if (phylipFileName == "not found") {
144 //if there is a current phylip file, use it
145 phylipFileName = m->getPhylipFile();
146 if (phylipFileName != "") { m->mothurOut("Using " + phylipFileName + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
147 else { m->mothurOut("You have no current phylip file and the phylip parameter is required."); m->mothurOutEndLine(); abort = true; }
149 }else { m->setPhylipFile(phylipFileName); }
151 //check for required parameters
152 designFileName = validParameter.validFile(parameters, "design", true);
153 if (designFileName == "not open") { abort = true; }
154 else if (designFileName == "not found") {
155 //if there is a current design file, use it
156 designFileName = m->getDesignFile();
157 if (designFileName != "") { m->mothurOut("Using " + designFileName + " as input file for the design parameter."); m->mothurOutEndLine(); }
158 else { m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
159 }else { m->setDesignFile(designFileName); }
161 string temp = validParameter.validFile(parameters, "iters", false);
162 if (temp == "not found") { temp = "1000"; }
163 m->mothurConvert(temp, iters);
165 temp = validParameter.validFile(parameters, "alpha", false);
166 if (temp == "not found") { temp = "0.05"; }
167 m->mothurConvert(temp, experimentwiseAlpha);
169 string sets = validParameter.validFile(parameters, "sets", false);
170 if (sets == "not found") { sets = ""; }
172 m->splitAtDash(sets, Sets);
177 catch(exception& e) {
178 m->errorOut(e, "HomovaCommand", "HomovaCommand");
182 //**********************************************************************************************************************
184 int HomovaCommand::execute(){
187 if (abort == true) { if (calledHelp) { return 0; } return 2; }
190 designMap = new GroupMap(designFileName);
191 designMap->readDesignMap();
193 if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
195 //read in distance matrix and square it
196 ReadPhylipVector readMatrix(phylipFileName);
197 vector<string> sampleNames = readMatrix.read(distanceMatrix);
199 if (Sets.size() != 0) { //user selected sets, so we want to remove the samples not in those sets
201 vector<string> dGroups = designMap->getNamesOfGroups();
202 util.setGroups(Sets, dGroups);
204 for(int i=0;i<distanceMatrix.size();i++){
206 if (m->control_pressed) { delete designMap; return 0; }
208 string group = designMap->getGroup(sampleNames[i]);
210 if (group == "not found") {
211 m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
212 }else if (!m->inUsersGroups(group, Sets)){ //not in set we want remove it
213 //remove from all other rows
214 for(int j=0;j<distanceMatrix.size();j++){
215 distanceMatrix[j].erase(distanceMatrix[j].begin()+i);
217 distanceMatrix.erase(distanceMatrix.begin()+i);
218 sampleNames.erase(sampleNames.begin()+i);
224 for(int i=0;i<distanceMatrix.size();i++){
225 for(int j=0;j<i;j++){
226 distanceMatrix[i][j] *= distanceMatrix[i][j];
230 //link designMap to rows/columns in distance matrix
231 map<string, vector<int> > origGroupSampleMap;
232 for(int i=0;i<sampleNames.size();i++){
233 string group = designMap->getGroup(sampleNames[i]);
235 if (group == "not found") {
236 m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
237 }else { origGroupSampleMap[group].push_back(i); }
239 int numGroups = origGroupSampleMap.size();
241 if (m->control_pressed) { delete designMap; return 0; }
243 //create a new filename
245 string HOMOVAFileName = outputDir + m->getRootName(m->getSimpleName(phylipFileName)) + getOutputFileNameTag("homova");
246 m->openOutputFile(HOMOVAFileName, HOMOVAFile);
247 outputNames.push_back(HOMOVAFileName); outputTypes["homova"].push_back(HOMOVAFileName);
249 HOMOVAFile << "HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values" << endl;
250 m->mothurOut("HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values\n");
252 double fullHOMOVAPValue = runHOMOVA(HOMOVAFile, origGroupSampleMap, experimentwiseAlpha);
254 if(fullHOMOVAPValue <= experimentwiseAlpha && numGroups > 2){
256 int numCombos = numGroups * (numGroups-1) / 2;
257 double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
259 map<string, vector<int> >::iterator itA;
260 map<string, vector<int> >::iterator itB;
262 for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
264 for(;itB!=origGroupSampleMap.end();itB++){
265 map<string, vector<int> > pairwiseGroupSampleMap;
266 pairwiseGroupSampleMap[itA->first] = itA->second;
267 pairwiseGroupSampleMap[itB->first] = itB->second;
269 runHOMOVA(HOMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha);
273 m->mothurOutEndLine();
275 m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
276 m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
279 m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
282 m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
286 m->mothurOutEndLine();
287 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
288 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
289 m->mothurOutEndLine();
293 catch(exception& e) {
294 m->errorOut(e, "HomovaCommand", "execute");
299 //**********************************************************************************************************************
301 double HomovaCommand::runHOMOVA(ofstream& HOMOVAFile, map<string, vector<int> > groupSampleMap, double alpha){
303 map<string, vector<int> >::iterator it;
304 int numGroups = groupSampleMap.size();
306 vector<double> ssWithinOrigVector;
307 double bValueOrig = calcBValue(groupSampleMap, ssWithinOrigVector);
310 for(int i=0;i<iters;i++){
311 vector<double> ssWithinRandVector;
312 map<string, vector<int> > randomizedGroup = getRandomizedGroups(groupSampleMap);
313 double bValueRand = calcBValue(randomizedGroup, ssWithinRandVector);
314 if(bValueRand > bValueOrig){ counter++; }
317 double pValue = (double) counter / (double) iters;
319 if(pValue < 1/(double)iters){ pString = '<' + toString(1/(double)iters); }
320 else { pString = toString(pValue); }
324 it = groupSampleMap.begin();
325 HOMOVAFile << it->first;
326 m->mothurOut(it->first);
328 for(;it!=groupSampleMap.end();it++){
329 HOMOVAFile << '-' << it->first;
330 m->mothurOut('-' + it->first);
333 HOMOVAFile << '\t' << bValueOrig << '\t' << pString;
334 m->mothurOut('\t' + toString(bValueOrig) + '\t' + pString);
341 for(int i=0;i<numGroups;i++){
342 HOMOVAFile << '\t' << ssWithinOrigVector[i];
343 m->mothurOut('\t' + toString(ssWithinOrigVector[i]));
346 m->mothurOutEndLine();
350 catch(exception& e) {
351 m->errorOut(e, "HomovaCommand", "runHOMOVA");
356 //**********************************************************************************************************************
358 double HomovaCommand::calcSigleSSWithin(vector<int> sampleIndices) {
360 double ssWithin = 0.0;
361 int numSamplesInGroup = sampleIndices.size();
363 for(int i=0;i<numSamplesInGroup;i++){
364 int row = sampleIndices[i];
366 for(int j=0;j<numSamplesInGroup;j++){
367 int col = sampleIndices[j];
370 ssWithin += distanceMatrix[row][col];
376 ssWithin /= numSamplesInGroup;
379 catch(exception& e) {
380 m->errorOut(e, "HomovaCommand", "calcSigleSSWithin");
385 //**********************************************************************************************************************
387 double HomovaCommand::calcBValue(map<string, vector<int> > groupSampleMap, vector<double>& ssWithinVector) {
390 map<string, vector<int> >::iterator it;
392 double numGroups = (double)groupSampleMap.size();
393 ssWithinVector.resize(numGroups, 0);
395 double totalNumSamples = 0;
397 double secondTermSum = 0;
398 double inverseOneMinusSum = 0;
401 ssWithinVector.resize(numGroups, 0);
402 for(it = groupSampleMap.begin();it!=groupSampleMap.end();it++){
403 int numSamplesInGroup = it->second.size();
404 totalNumSamples += numSamplesInGroup;
406 ssWithinVector[index] = calcSigleSSWithin(it->second);
407 ssWithinFull += ssWithinVector[index];
409 secondTermSum += (numSamplesInGroup - 1) * log(ssWithinVector[index] / (double)(numSamplesInGroup - 1));
410 inverseOneMinusSum += 1.0 / (double)(numSamplesInGroup - 1);
412 ssWithinVector[index] /= (double)(numSamplesInGroup - 1); //this line is only for output purposes to scale SSw by the number of samples in the group
416 double B = (totalNumSamples - numGroups) * log(ssWithinFull/(totalNumSamples-numGroups)) - secondTermSum;
417 double denomintor = 1 + 1.0/(3.0 * (numGroups - 1.0)) * (inverseOneMinusSum - 1.0 / (double) (totalNumSamples - numGroups));
423 catch(exception& e) {
424 m->errorOut(e, "HomovaCommand", "calcBValue");
429 //**********************************************************************************************************************
431 map<string, vector<int> > HomovaCommand::getRandomizedGroups(map<string, vector<int> > origMapping){
433 vector<int> sampleIndices;
434 vector<int> samplesPerGroup;
436 map<string, vector<int> >::iterator it;
437 for(it=origMapping.begin();it!=origMapping.end();it++){
438 vector<int> indices = it->second;
439 samplesPerGroup.push_back(indices.size());
440 sampleIndices.insert(sampleIndices.end(), indices.begin(), indices.end());
443 random_shuffle(sampleIndices.begin(), sampleIndices.end());
446 map<string, vector<int> > randomizedGroups = origMapping;
447 for(it=randomizedGroups.begin();it!=randomizedGroups.end();it++){
448 for(int i=0;i<it->second.size();i++){
449 it->second[i] = sampleIndices[index++];
453 return randomizedGroups;
455 catch (exception& e) {
456 m->errorOut(e, "AmovaCommand", "randomizeGroups");
461 //**********************************************************************************************************************