]> git.donarmstrong.com Git - mothur.git/blob - kruskalwalliscommand.cpp
working on pam
[mothur.git] / kruskalwalliscommand.cpp
1 /* 
2  * File:   kruskalwalliscommand.cpp
3  * Author: kiverson
4  *
5  * Created on June 26, 2012, 11:06 AM
6  */
7
8 #include "kruskalwalliscommand.h"
9 #include "linearalgebra.h"
10
11 //**********************************************************************************************************************
12 vector<string> KruskalWallisCommand::setParameters(){
13         try {
14         CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);
15         CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
16         CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass);
17                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
18         //every command must have inputdir and outputdir.  This allows mothur users to redirect input and output files.
19                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
20                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
21                 
22                 vector<string> myArray;
23                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
24                 return myArray;
25         }
26         catch(exception& e) {
27                 m->errorOut(e, "KruskalWallisCommand", "setParameters");
28                 exit(1);
29         }
30 }
31 //**********************************************************************************************************************
32 string KruskalWallisCommand::getHelpString(){
33         try {
34                 string helpString = "";
35                 helpString += "The kruskal.wallis command allows you to ....\n";
36                 helpString += "The kruskal.wallis command parameters are: shared, design, class, label and classes.\n";
37                 helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n";
38         helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n";
39                 helpString += "The kruskal.wallis command should be in the following format: kruskal.wallis(shared=final.an.shared, design=final.design, class=treatment).\n";
40         return helpString;
41         }
42         catch(exception& e) {
43                 m->errorOut(e, "KruskalWallisCommand", "getHelpString");
44                 exit(1);
45         }
46 }
47 //**********************************************************************************************************************
48 string KruskalWallisCommand::getOutputPattern(string type) {
49     try {
50         string pattern = "";
51         
52         if (type == "kruskall-wallis") {  pattern = "[filename],[distance],kruskall_wallis"; }
53         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
54         
55         return pattern;
56     }
57     catch(exception& e) {
58         m->errorOut(e, "KruskalWallisCommand", "getOutputPattern");
59         exit(1);
60     }
61 }
62 //**********************************************************************************************************************
63 KruskalWallisCommand::KruskalWallisCommand(){
64         try {
65                 abort = true; calledHelp = true;
66                 setParameters();
67         vector<string> tempOutNames;
68         outputTypes["kruskall-wallis"] = tempOutNames;
69         }
70         catch(exception& e) {
71                 m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand");
72                 exit(1);
73         }
74 }
75 //**********************************************************************************************************************
76 KruskalWallisCommand::KruskalWallisCommand(string option)  {
77         try {
78                 abort = false; calledHelp = false;
79         allLines = 1;
80                 
81                 //allow user to run help
82                 if(option == "help") { help(); abort = true; calledHelp = true; }
83                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
84                 
85                 else {
86                         //valid paramters for this command
87                         vector<string> myArray = setParameters();
88                         
89                         OptionParser parser(option);
90                         map<string,string> parameters = parser.getParameters();
91                         
92                         ValidParameters validParameter;
93                         map<string,string>::iterator it;
94                         //check to make sure all parameters are valid for command
95                         for (it = parameters.begin(); it != parameters.end(); it++) {
96                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
97                         }
98                         
99                         vector<string> tempOutNames;
100             outputTypes["kruskall-wallis"] = tempOutNames;
101             
102                         //if the user changes the input directory command factory will send this info to us in the output parameter
103                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
104                         if (inputDir == "not found"){   inputDir = "";          }
105                         else {
106                 
107                 string path;
108                                 it = parameters.find("design");
109                                 //user has given a template file
110                                 if(it != parameters.end()){
111                                         path = m->hasPath(it->second);
112                                         //if the user has not given a path then, add inputdir. else leave path alone.
113                                         if (path == "") {       parameters["desing"] = inputDir + it->second;           }
114                                 }
115                                 
116                 it = parameters.find("shared");
117                                 //user has given a template file
118                                 if(it != parameters.end()){
119                                         path = m->hasPath(it->second);
120                                         //if the user has not given a path then, add inputdir. else leave path alone.
121                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
122                                 }
123             }
124             
125             //get shared file, it is required
126                         sharedfile = validParameter.validFile(parameters, "shared", true);
127                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }
128                         else if (sharedfile == "not found") {
129                                 //if there is a current shared file, use it
130                                 sharedfile = m->getSharedFile();
131                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
132                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
133                         }else { m->setSharedFile(sharedfile); }
134             
135             //get shared file, it is required
136                         designfile = validParameter.validFile(parameters, "design", true);
137                         if (designfile == "not open") { designfile = ""; abort = true; }
138                         else if (designfile == "not found") {
139                                 //if there is a current shared file, use it
140                                 designfile = m->getDesignFile();
141                                 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
142                                 else {  m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
143                         }else { m->setDesignFile(designfile); }
144             
145             //if the user changes the output directory command factory will send this info to us in the output parameter
146                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){
147                                 outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
148                         }
149             
150             string label = validParameter.validFile(parameters, "label", false);
151                         if (label == "not found") { label = ""; }
152                         else {
153                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
154                                 else { allLines = 1;  }
155                         }
156             
157             mclass = validParameter.validFile(parameters, "class", false);
158                         if (mclass == "not found") { mclass = ""; }
159             
160                 }
161                 
162         }
163         catch(exception& e) {
164                 m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand");
165                 exit(1);
166         }
167 }
168 //**********************************************************************************************************************
169
170 int KruskalWallisCommand::execute(){
171         try {
172                 
173                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
174         
175         DesignMap designMap(designfile);
176         
177         //if user did not select class use first column
178         if (mclass == "") {  mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); }
179         
180         InputData input(sharedfile, "sharedfile");
181         vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
182         string lastLabel = lookup[0]->getLabel();
183         
184         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
185         set<string> processedLabels;
186         set<string> userLabels = labels;
187         
188         
189         //as long as you are not at the end of the file or done wih the lines you want
190         while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
191             
192             if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
193             
194             if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
195                 
196                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
197                 
198                 process(lookup, designMap);
199                 
200                 processedLabels.insert(lookup[0]->getLabel());
201                 userLabels.erase(lookup[0]->getLabel());
202             }
203             
204             if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
205                 string saveLabel = lookup[0]->getLabel();
206                 
207                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
208                 lookup = input.getSharedRAbundVectors(lastLabel);
209                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
210                 
211                 process(lookup, designMap);
212                 
213                 processedLabels.insert(lookup[0]->getLabel());
214                 userLabels.erase(lookup[0]->getLabel());
215                 
216                 //restore real lastlabel to save below
217                 lookup[0]->setLabel(saveLabel);
218             }
219             
220             lastLabel = lookup[0]->getLabel();
221             //prevent memory leak
222             for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
223             
224             if (m->control_pressed) { return 0; }
225             
226             //get next line to process
227             lookup = input.getSharedRAbundVectors();
228         }
229         
230         if (m->control_pressed) {  return 0; }
231         
232         //output error messages about any remaining user labels
233         set<string>::iterator it;
234         bool needToRun = false;
235         for (it = userLabels.begin(); it != userLabels.end(); it++) {
236             m->mothurOut("Your file does not include the label " + *it);
237             if (processedLabels.count(lastLabel) != 1) {
238                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
239                 needToRun = true;
240             }else {
241                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
242             }
243         }
244         
245         //run last label if you need to
246         if (needToRun == true)  {
247             for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
248             lookup = input.getSharedRAbundVectors(lastLabel);
249             
250             m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
251             process(lookup, designMap);
252             
253             for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
254         }
255         
256                 
257         //output files created by command
258                 m->mothurOutEndLine();
259                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
260                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
261                 m->mothurOutEndLine();
262         return 0;
263                 
264     }
265         catch(exception& e) {
266                 m->errorOut(e, "KruskalWallisCommand", "execute");
267                 exit(1);
268         }
269 }
270 //**********************************************************************************************************************
271
272 int KruskalWallisCommand::process(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) {
273         try {
274         map<string, string> variables;
275         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
276         variables["[distance]"] = lookup[0]->getLabel();
277                 string outputFileName = getOutputFileName("kruskall-wallis",variables);
278         
279                 ofstream out;
280                 m->openOutputFile(outputFileName, out);
281                 outputNames.push_back(outputFileName); outputTypes["kruskall-wallis"].push_back(outputFileName);
282         out << "OTULabel\tKW\tPvalue\n";
283         
284         int numBins = lookup[0]->getNumBins();
285         //sanity check to make sure each treatment has a group in the shared file
286         set<string> treatments;
287         for (int j = 0; j < lookup.size(); j++) {
288             string group = lookup[j]->getGroup();
289             string treatment = designMap.get(group, mclass); //get value for this group in this category
290             treatments.insert(treatment);
291         }
292         if (treatments.size() < 2) { m->mothurOut("[ERROR]: need at least 2 things to classes to compare, quitting.\n"); m->control_pressed = true; }
293         
294         LinearAlgebra linear;
295         for (int i = 0; i < numBins; i++) {
296             if (m->control_pressed) { break; }
297             
298             vector<spearmanRank> values;
299             for (int j = 0; j < lookup.size(); j++) {
300                 string group = lookup[j]->getGroup();
301                 string treatment = designMap.get(group, mclass); //get value for this group in this category
302                 spearmanRank temp(treatment, lookup[j]->getAbundance(i));
303                 values.push_back(temp);
304             }
305             
306             double pValue = 0.0;
307             double H = linear.calcKruskalWallis(values, pValue);
308             
309             //output H and signifigance
310             out << m->currentSharedBinLabels[i] << '\t' << H << '\t' << pValue << endl;
311         }
312         out.close();
313                 
314         return 0;
315     }
316         catch(exception& e) {
317                 m->errorOut(e, "KruskalWallisCommand", "process");
318                 exit(1);
319         }
320 }
321 //**********************************************************************************************************************
322
323