2 * File: kruskalwalliscommand.cpp
5 * Created on June 26, 2012, 11:06 AM
8 #include "kruskalwalliscommand.h"
9 #include "linearalgebra.h"
11 //**********************************************************************************************************************
12 vector<string> KruskalWallisCommand::setParameters(){
14 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);
15 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
16 CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass);
17 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
18 CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses);
19 //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files.
20 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
21 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
23 vector<string> myArray;
24 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
28 m->errorOut(e, "KruskalWallisCommand", "setParameters");
32 //**********************************************************************************************************************
33 string KruskalWallisCommand::getHelpString(){
35 string helpString = "";
36 helpString += "The kruskal.wallis command allows you to ....\n";
37 helpString += "The kruskal.wallis command parameters are: shared, design, class, label and classes.\n";
38 helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n";
39 helpString += "The classes parameter is used to indicate the classes you would like to use. Clases should be inputted in the following format: classes=label<value1|value2|value3>-label<value1|value2>. For example to include groups from treatment with value early or late and age= young or old. class=treatment<Early|Late>-age<young|old>.\n";
40 helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n";
41 helpString += "The kruskal.wallis command should be in the following format: kruskal.wallis(shared=final.an.shared, design=final.design, class=treatment).\n";
45 m->errorOut(e, "KruskalWallisCommand", "getHelpString");
49 //**********************************************************************************************************************
50 string KruskalWallisCommand::getOutputPattern(string type) {
54 if (type == "kruskall-wallis") { pattern = "[filename],[distance],kruskall_wallis"; }
55 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
60 m->errorOut(e, "KruskalWallisCommand", "getOutputPattern");
64 //**********************************************************************************************************************
65 KruskalWallisCommand::KruskalWallisCommand(){
67 abort = true; calledHelp = true;
69 vector<string> tempOutNames;
70 outputTypes["kruskall-wallis"] = tempOutNames;
73 m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand");
77 //**********************************************************************************************************************
78 KruskalWallisCommand::KruskalWallisCommand(string option) {
80 abort = false; calledHelp = false;
83 //allow user to run help
84 if(option == "help") { help(); abort = true; calledHelp = true; }
85 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
88 //valid paramters for this command
89 vector<string> myArray = setParameters();
91 OptionParser parser(option);
92 map<string,string> parameters = parser.getParameters();
94 ValidParameters validParameter;
95 map<string,string>::iterator it;
96 //check to make sure all parameters are valid for command
97 for (it = parameters.begin(); it != parameters.end(); it++) {
98 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
101 vector<string> tempOutNames;
102 outputTypes["kruskall-wallis"] = tempOutNames;
104 //if the user changes the input directory command factory will send this info to us in the output parameter
105 string inputDir = validParameter.validFile(parameters, "inputdir", false);
106 if (inputDir == "not found"){ inputDir = ""; }
110 it = parameters.find("design");
111 //user has given a template file
112 if(it != parameters.end()){
113 path = m->hasPath(it->second);
114 //if the user has not given a path then, add inputdir. else leave path alone.
115 if (path == "") { parameters["desing"] = inputDir + it->second; }
118 it = parameters.find("shared");
119 //user has given a template file
120 if(it != parameters.end()){
121 path = m->hasPath(it->second);
122 //if the user has not given a path then, add inputdir. else leave path alone.
123 if (path == "") { parameters["shared"] = inputDir + it->second; }
127 //get shared file, it is required
128 sharedfile = validParameter.validFile(parameters, "shared", true);
129 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
130 else if (sharedfile == "not found") {
131 //if there is a current shared file, use it
132 sharedfile = m->getSharedFile();
133 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
134 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
135 }else { m->setSharedFile(sharedfile); }
137 //get shared file, it is required
138 designfile = validParameter.validFile(parameters, "design", true);
139 if (designfile == "not open") { designfile = ""; abort = true; }
140 else if (designfile == "not found") {
141 //if there is a current shared file, use it
142 designfile = m->getDesignFile();
143 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
144 else { m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
145 }else { m->setDesignFile(designfile); }
147 //if the user changes the output directory command factory will send this info to us in the output parameter
148 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
149 outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
152 string label = validParameter.validFile(parameters, "label", false);
153 if (label == "not found") { label = ""; }
155 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
156 else { allLines = 1; }
159 mclass = validParameter.validFile(parameters, "class", false);
160 if (mclass == "not found") { mclass = ""; }
162 classes = validParameter.validFile(parameters, "classes", false);
163 if (classes == "not found") { classes = ""; }
168 catch(exception& e) {
169 m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand");
173 //**********************************************************************************************************************
175 int KruskalWallisCommand::execute(){
178 if (abort == true) { if (calledHelp) { return 0; } return 2; }
180 DesignMap designMap(designfile);
181 //if user set classes set groups=those classes
183 map<string, vector<string> > thisClasses = m->parseClasses(classes);
184 vector<string> groups = designMap.getNamesUnique(thisClasses);
185 if (groups.size() != 0) { m->setGroups(groups); }
186 else { m->mothurOut("[ERROR]: no groups meet your classes requirement, quitting.\n"); return 0; }
189 //if user did not select class use first column
190 if (mclass == "") { mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); }
192 InputData input(sharedfile, "sharedfile");
193 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
194 string lastLabel = lookup[0]->getLabel();
196 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
197 set<string> processedLabels;
198 set<string> userLabels = labels;
201 //as long as you are not at the end of the file or done wih the lines you want
202 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
204 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
206 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
208 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
210 process(lookup, designMap);
212 processedLabels.insert(lookup[0]->getLabel());
213 userLabels.erase(lookup[0]->getLabel());
216 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
217 string saveLabel = lookup[0]->getLabel();
219 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
220 lookup = input.getSharedRAbundVectors(lastLabel);
221 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
223 process(lookup, designMap);
225 processedLabels.insert(lookup[0]->getLabel());
226 userLabels.erase(lookup[0]->getLabel());
228 //restore real lastlabel to save below
229 lookup[0]->setLabel(saveLabel);
232 lastLabel = lookup[0]->getLabel();
233 //prevent memory leak
234 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
236 if (m->control_pressed) { return 0; }
238 //get next line to process
239 lookup = input.getSharedRAbundVectors();
242 if (m->control_pressed) { return 0; }
244 //output error messages about any remaining user labels
245 set<string>::iterator it;
246 bool needToRun = false;
247 for (it = userLabels.begin(); it != userLabels.end(); it++) {
248 m->mothurOut("Your file does not include the label " + *it);
249 if (processedLabels.count(lastLabel) != 1) {
250 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
253 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
257 //run last label if you need to
258 if (needToRun == true) {
259 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
260 lookup = input.getSharedRAbundVectors(lastLabel);
262 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
263 process(lookup, designMap);
265 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
269 //output files created by command
270 m->mothurOutEndLine();
271 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
272 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
273 m->mothurOutEndLine();
277 catch(exception& e) {
278 m->errorOut(e, "KruskalWallisCommand", "execute");
282 //**********************************************************************************************************************
284 int KruskalWallisCommand::process(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) {
286 map<string, string> variables;
287 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
288 variables["[distance]"] = lookup[0]->getLabel();
289 string outputFileName = getOutputFileName("kruskall-wallis",variables);
292 m->openOutputFile(outputFileName, out);
293 outputNames.push_back(outputFileName); outputTypes["kruskall-wallis"].push_back(outputFileName);
294 out << "OTULabel\tKW\tPvalue\n";
296 int numBins = lookup[0]->getNumBins();
297 //sanity check to make sure each treatment has a group in the shared file
298 set<string> treatments;
299 for (int j = 0; j < lookup.size(); j++) {
300 string group = lookup[j]->getGroup();
301 string treatment = designMap.get(group, mclass); //get value for this group in this category
302 treatments.insert(treatment);
304 if (treatments.size() < 2) { m->mothurOut("[ERROR]: need at least 2 things to classes to compare, quitting.\n"); m->control_pressed = true; }
306 LinearAlgebra linear;
307 for (int i = 0; i < numBins; i++) {
308 if (m->control_pressed) { break; }
310 vector<spearmanRank> values;
311 for (int j = 0; j < lookup.size(); j++) {
312 string group = lookup[j]->getGroup();
313 string treatment = designMap.get(group, mclass); //get value for this group in this category
314 spearmanRank temp(treatment, lookup[j]->getAbundance(i));
315 values.push_back(temp);
319 double H = linear.calcKruskalWallis(values, pValue);
321 //output H and signifigance
322 out << m->currentBinLabels[i] << '\t' << H << '\t' << pValue << endl;
328 catch(exception& e) {
329 m->errorOut(e, "KruskalWallisCommand", "process");
333 //**********************************************************************************************************************