]> git.donarmstrong.com Git - mothur.git/blob - kruskalwalliscommand.cpp
added kruskal.wallis command. added worked on make.lefse. working of lefse command...
[mothur.git] / kruskalwalliscommand.cpp
1 /* 
2  * File:   kruskalwalliscommand.cpp
3  * Author: kiverson
4  *
5  * Created on June 26, 2012, 11:06 AM
6  */
7
8 #include "kruskalwalliscommand.h"
9 #include "linearalgebra.h"
10
11 //**********************************************************************************************************************
12 vector<string> KruskalWallisCommand::setParameters(){
13         try {
14         CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);
15         CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
16         CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass);
17                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
18                 CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses);
19         //every command must have inputdir and outputdir.  This allows mothur users to redirect input and output files.
20                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
21                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
22                 
23                 vector<string> myArray;
24                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
25                 return myArray;
26         }
27         catch(exception& e) {
28                 m->errorOut(e, "KruskalWallisCommand", "setParameters");
29                 exit(1);
30         }
31 }
32 //**********************************************************************************************************************
33 string KruskalWallisCommand::getHelpString(){
34         try {
35                 string helpString = "";
36                 helpString += "The kruskal.wallis command allows you to ....\n";
37                 helpString += "The kruskal.wallis command parameters are: shared, design, class,  label and classes.\n";
38                 helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n";
39         helpString += "The classes parameter is used to indicate the classes you would like to use. Clases should be inputted in the following format: classes=label<value1|value2|value3>-label<value1|value2>. For example to include groups from treatment with value early or late and age= young or old.  class=treatment<Early|Late>-age<young|old>.\n";
40         helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n";
41                 helpString += "The kruskal.wallis command should be in the following format: kruskal.wallis(shared=final.an.shared, design=final.design, class=treatment).\n";
42         return helpString;
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "KruskalWallisCommand", "getHelpString");
46                 exit(1);
47         }
48 }
49 //**********************************************************************************************************************
50 string KruskalWallisCommand::getOutputPattern(string type) {
51     try {
52         string pattern = "";
53         
54         if (type == "kruskall-wallis") {  pattern = "[filename],[distance],kruskall_wallis"; }
55         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
56         
57         return pattern;
58     }
59     catch(exception& e) {
60         m->errorOut(e, "KruskalWallisCommand", "getOutputPattern");
61         exit(1);
62     }
63 }
64 //**********************************************************************************************************************
65 KruskalWallisCommand::KruskalWallisCommand(){
66         try {
67                 abort = true; calledHelp = true;
68                 setParameters();
69         vector<string> tempOutNames;
70         outputTypes["kruskall-wallis"] = tempOutNames;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand");
74                 exit(1);
75         }
76 }
77 //**********************************************************************************************************************
78 KruskalWallisCommand::KruskalWallisCommand(string option)  {
79         try {
80                 abort = false; calledHelp = false;
81         allLines = 1;
82                 
83                 //allow user to run help
84                 if(option == "help") { help(); abort = true; calledHelp = true; }
85                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
86                 
87                 else {
88                         //valid paramters for this command
89                         vector<string> myArray = setParameters();
90                         
91                         OptionParser parser(option);
92                         map<string,string> parameters = parser.getParameters();
93                         
94                         ValidParameters validParameter;
95                         map<string,string>::iterator it;
96                         //check to make sure all parameters are valid for command
97                         for (it = parameters.begin(); it != parameters.end(); it++) {
98                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
99                         }
100                         
101                         vector<string> tempOutNames;
102             outputTypes["kruskall-wallis"] = tempOutNames;
103             
104                         //if the user changes the input directory command factory will send this info to us in the output parameter
105                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
106                         if (inputDir == "not found"){   inputDir = "";          }
107                         else {
108                 
109                 string path;
110                                 it = parameters.find("design");
111                                 //user has given a template file
112                                 if(it != parameters.end()){
113                                         path = m->hasPath(it->second);
114                                         //if the user has not given a path then, add inputdir. else leave path alone.
115                                         if (path == "") {       parameters["desing"] = inputDir + it->second;           }
116                                 }
117                                 
118                 it = parameters.find("shared");
119                                 //user has given a template file
120                                 if(it != parameters.end()){
121                                         path = m->hasPath(it->second);
122                                         //if the user has not given a path then, add inputdir. else leave path alone.
123                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
124                                 }
125             }
126             
127             //get shared file, it is required
128                         sharedfile = validParameter.validFile(parameters, "shared", true);
129                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }
130                         else if (sharedfile == "not found") {
131                                 //if there is a current shared file, use it
132                                 sharedfile = m->getSharedFile();
133                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
134                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
135                         }else { m->setSharedFile(sharedfile); }
136             
137             //get shared file, it is required
138                         designfile = validParameter.validFile(parameters, "design", true);
139                         if (designfile == "not open") { designfile = ""; abort = true; }
140                         else if (designfile == "not found") {
141                                 //if there is a current shared file, use it
142                                 designfile = m->getDesignFile();
143                                 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
144                                 else {  m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
145                         }else { m->setDesignFile(designfile); }
146             
147             //if the user changes the output directory command factory will send this info to us in the output parameter
148                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){
149                                 outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
150                         }
151             
152             string label = validParameter.validFile(parameters, "label", false);
153                         if (label == "not found") { label = ""; }
154                         else {
155                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
156                                 else { allLines = 1;  }
157                         }
158             
159             mclass = validParameter.validFile(parameters, "class", false);
160                         if (mclass == "not found") { mclass = ""; }
161             
162             classes = validParameter.validFile(parameters, "classes", false);
163                         if (classes == "not found") { classes = ""; }
164             
165                 }
166                 
167         }
168         catch(exception& e) {
169                 m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand");
170                 exit(1);
171         }
172 }
173 //**********************************************************************************************************************
174
175 int KruskalWallisCommand::execute(){
176         try {
177                 
178                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
179         
180         DesignMap designMap(designfile);
181         //if user set classes set groups=those classes
182         if (classes != "") {
183             map<string, vector<string> > thisClasses = m->parseClasses(classes);
184             vector<string> groups = designMap.getNamesUnique(thisClasses);
185             if (groups.size() != 0) { m->setGroups(groups); }
186             else { m->mothurOut("[ERROR]: no groups meet your classes requirement, quitting.\n"); return 0; }
187         }
188         
189         //if user did not select class use first column
190         if (mclass == "") {  mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); }
191         
192         InputData input(sharedfile, "sharedfile");
193         vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
194         string lastLabel = lookup[0]->getLabel();
195         
196         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
197         set<string> processedLabels;
198         set<string> userLabels = labels;
199         
200         
201         //as long as you are not at the end of the file or done wih the lines you want
202         while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
203             
204             if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
205             
206             if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
207                 
208                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
209                 
210                 process(lookup, designMap);
211                 
212                 processedLabels.insert(lookup[0]->getLabel());
213                 userLabels.erase(lookup[0]->getLabel());
214             }
215             
216             if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
217                 string saveLabel = lookup[0]->getLabel();
218                 
219                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
220                 lookup = input.getSharedRAbundVectors(lastLabel);
221                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
222                 
223                 process(lookup, designMap);
224                 
225                 processedLabels.insert(lookup[0]->getLabel());
226                 userLabels.erase(lookup[0]->getLabel());
227                 
228                 //restore real lastlabel to save below
229                 lookup[0]->setLabel(saveLabel);
230             }
231             
232             lastLabel = lookup[0]->getLabel();
233             //prevent memory leak
234             for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
235             
236             if (m->control_pressed) { return 0; }
237             
238             //get next line to process
239             lookup = input.getSharedRAbundVectors();
240         }
241         
242         if (m->control_pressed) {  return 0; }
243         
244         //output error messages about any remaining user labels
245         set<string>::iterator it;
246         bool needToRun = false;
247         for (it = userLabels.begin(); it != userLabels.end(); it++) {
248             m->mothurOut("Your file does not include the label " + *it);
249             if (processedLabels.count(lastLabel) != 1) {
250                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
251                 needToRun = true;
252             }else {
253                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
254             }
255         }
256         
257         //run last label if you need to
258         if (needToRun == true)  {
259             for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
260             lookup = input.getSharedRAbundVectors(lastLabel);
261             
262             m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
263             process(lookup, designMap);
264             
265             for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
266         }
267         
268                 
269         //output files created by command
270                 m->mothurOutEndLine();
271                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
272                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
273                 m->mothurOutEndLine();
274         return 0;
275                 
276     }
277         catch(exception& e) {
278                 m->errorOut(e, "KruskalWallisCommand", "execute");
279                 exit(1);
280         }
281 }
282 //**********************************************************************************************************************
283
284 int KruskalWallisCommand::process(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) {
285         try {
286         map<string, string> variables;
287         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
288         variables["[distance]"] = lookup[0]->getLabel();
289                 string outputFileName = getOutputFileName("kruskall-wallis",variables);
290         
291                 ofstream out;
292                 m->openOutputFile(outputFileName, out);
293                 outputNames.push_back(outputFileName); outputTypes["kruskall-wallis"].push_back(outputFileName);
294         out << "OTULabel\tKW\tPvalue\n";
295         
296         int numBins = lookup[0]->getNumBins();
297         //sanity check to make sure each treatment has a group in the shared file
298         set<string> treatments;
299         for (int j = 0; j < lookup.size(); j++) {
300             string group = lookup[j]->getGroup();
301             string treatment = designMap.get(group, mclass); //get value for this group in this category
302             treatments.insert(treatment);
303         }
304         if (treatments.size() < 2) { m->mothurOut("[ERROR]: need at least 2 things to classes to compare, quitting.\n"); m->control_pressed = true; }
305         
306         LinearAlgebra linear;
307         for (int i = 0; i < numBins; i++) {
308             if (m->control_pressed) { break; }
309             
310             vector<spearmanRank> values;
311             for (int j = 0; j < lookup.size(); j++) {
312                 string group = lookup[j]->getGroup();
313                 string treatment = designMap.get(group, mclass); //get value for this group in this category
314                 spearmanRank temp(treatment, lookup[j]->getAbundance(i));
315                 values.push_back(temp);
316             }
317             
318             double pValue = 0.0;
319             double H = linear.calcKruskalWallis(values, pValue);
320             
321             //output H and signifigance
322             out << m->currentBinLabels[i] << '\t' << H << '\t' << pValue << endl;
323         }
324         out.close();
325                 
326         return 0;
327     }
328         catch(exception& e) {
329                 m->errorOut(e, "KruskalWallisCommand", "process");
330                 exit(1);
331         }
332 }
333 //**********************************************************************************************************************
334
335