]> git.donarmstrong.com Git - mothur.git/blob - kruskalwalliscommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / kruskalwalliscommand.cpp
1 /* 
2  * File:   kruskalwalliscommand.cpp
3  * Author: kiverson
4  *
5  * Created on June 26, 2012, 11:06 AM
6  */
7
8 #include "kruskalwalliscommand.h"
9
10 //**********************************************************************************************************************
11 vector<string> KruskalWallisCommand::setParameters(){   
12         try {
13                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
14                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
15         CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false,true); parameters.push_back(pgroups);
16         CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);      
17                 
18                 vector<string> myArray;
19                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
20                 return myArray;
21         }
22         catch(exception& e) {
23                 m->errorOut(e, "KruskalWallisCommand", "setParameters");
24                 exit(1);
25         }
26 }
27 //**********************************************************************************************************************
28 string KruskalWallisCommand::getHelpString(){   
29         try {
30                 string helpString = "";
31                 helpString += "The kruskalwallis command parameter options are \n";
32         helpString += "Kruskal–Wallis one-way analysis of variance is a non-parametric method for testing whether samples originate from the same distribution.";
33                 return helpString;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "KruskalWallisCommand", "getHelpString");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 string KruskalWallisCommand::getOutputPattern(string type) {
42     try {
43         string pattern = "";
44         
45         if (type == "summary") {  pattern = "[filename],cooccurence.summary"; } 
46         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
47         
48         return pattern;
49     }
50     catch(exception& e) {
51         m->errorOut(e, "KruskalWallisCommand", "getOutputPattern");
52         exit(1);
53     }
54 }
55 //**********************************************************************************************************************
56 KruskalWallisCommand::KruskalWallisCommand(){   
57         try {
58                 abort = true; calledHelp = true; 
59                 setParameters();
60         vector<string> tempOutNames;
61                 outputTypes["summary"] = tempOutNames;
62
63         }
64         catch(exception& e) {
65                 m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand");
66                 exit(1);
67         }
68 }
69 //**********************************************************************************************************************
70 KruskalWallisCommand::KruskalWallisCommand(string option) {
71         try {
72                 abort = false; calledHelp = false;   
73                                 
74                 //allow user to run help
75                 if(option == "help") { help(); abort = true; calledHelp = true; }
76                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
77                 
78                 else {
79                         vector<string> myArray = setParameters();
80                         
81                         OptionParser parser(option);
82                         map<string,string> parameters = parser.getParameters();
83                         map<string,string>::iterator it;
84                         
85                         ValidParameters validParameter;
86                         
87                         //check to make sure all parameters are valid for command
88                         for (it = parameters.begin(); it != parameters.end(); it++) { 
89                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
90                         }
91             
92             //get shared file
93                         sharedfile = validParameter.validFile(parameters, "shared", true);
94                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
95                         else if (sharedfile == "not found") { 
96                                 //if there is a current shared file, use it
97                                 sharedfile = m->getSharedFile(); 
98                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
99                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
100                         }else { m->setSharedFile(sharedfile); }
101             
102             //if the user changes the output directory command factory will send this info to us in the output parameter 
103                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
104                     
105             groups = validParameter.validFile(parameters, "groups", false);   
106             if (groups == "not found") { groups = "";   }
107             else { 
108             m->splitAtDash(groups, Groups); 
109             }   
110             m->setGroups(Groups);
111                                 
112                         //if the user changes the input directory command factory will send this info to us in the output parameter 
113                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
114                         if (inputDir == "not found"){   inputDir = "";          }
115                         else {
116                                 string path;
117                                 it = parameters.find("shared");
118                                 //user has given a template file
119                                 if(it != parameters.end()){ 
120                                         path = m->hasPath(it->second);
121                                         //if the user has not given a path then, add inputdir. else leave path alone.
122                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
123                                 }
124                         }
125                 
126             vector<string> tempOutNames;
127             outputTypes["summary"] = tempOutNames;
128
129
130                 }
131
132         }
133         catch(exception& e) {
134                 m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand");
135                 exit(1);
136         }
137 }
138 //**********************************************************************************************************************
139 int KruskalWallisCommand::execute(){
140         try {
141                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
142         
143         InputData* input = new InputData(sharedfile, "sharedfile");
144         vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
145                 string lastLabel = lookup[0]->getLabel();
146         
147         
148                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
149                 set<string> processedLabels;
150                 set<string> userLabels = labels;
151
152         ofstream out;
153         map<string,string> variables;
154         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
155                 string outputFileName = getOutputFileName("summary",variables);
156         m->openOutputFile(outputFileName, out);
157         outputNames.push_back(outputFileName);  outputTypes["summary"].push_back(outputFileName);
158         out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
159         out << "H\tpvalue\n";
160         
161         //math goes here
162         
163         int N = m->getNumGroups();
164         double H;
165         double tmp = 0.0;
166         vector<groupRank> vec;
167         vector<string> groups = m->getGroups();
168         string group;
169         int count;
170         double sum;
171                 
172         //merge all groups into a vector
173         
174         
175         
176         //rank function here
177         assignRank(vec);
178         
179         //populate counts and ranSums vectors
180         for (int i=0;i<N;i++) {
181             count = 0;
182             sum = 0;
183             group = groups[i];
184             for(int j;j<vec.size();j++) {
185                 if (vec[j].group == group) {
186                     count++;
187                     sum = sum + vec[j].rank;
188                 }
189             }
190             counts[i] = count;
191             rankSums[i] = sum;
192         }
193         
194         //test statistic
195         for (int i=0;i<N;i++) { tmp = tmp + (pow(rankSums[i],2) / counts[i]); }
196         
197         H = (12 / (N*(N+1))) * tmp - (3*(N+1));
198         
199         //ss = tmp - pow(accumulate(rankSums.begin(), rankSums.end(), 0), 2);
200         
201         //H = ss / ( (N * (N + 1))/12 );
202         
203         //correction for ties?
204         
205         //p-value calculation
206         
207                 return 0;
208         }
209         catch(exception& e) {
210                 m->errorOut(e, "KruskalWallisCommand", "execute");
211                 exit(1);
212         }
213 }
214 //**********************************************************************************************************************
215 void KruskalWallisCommand::assignRank(vector<groupRank> &vec) {
216     try {
217         double rank = 1;
218         double numRanks, avgRank, j;
219         vector<groupRank>::iterator it, oldit;
220
221         sort (vec.begin(), vec.end(), comparevalue);
222
223         it = vec.begin();
224
225         while ( it != vec.end() ) {
226             j = rank;
227             oldit = it;
228             if (!equalvalue(*it, *(it+1))) {
229                 (*it).rank = rank; 
230                 rank = rank+1; 
231                 it++; }
232             else {
233                 while(equalrank(*it, *(it+1))) {
234                     j = j + (j+1);
235                     rank++;
236                     it++;
237                 }
238                 numRanks = double (distance(oldit, it));
239                 avgRank = j / numRanks;
240                 while(oldit != it) {
241                     (*oldit).rank = avgRank;
242                     oldit++;
243                 }
244             }
245
246         }
247         
248
249     }
250     catch(exception& e) {
251                 m->errorOut(e, "KruskalWallisCommand", "getRank");
252                 exit(1);
253         }
254     
255 }
256 //**********************************************************************************************************************
257 void KruskalWallisCommand::assignValue(vector<groupRank> &vec) {
258     
259 }
260 //**********************************************************************************************************************
261 //**********************************************************************************************************************
262 //**********************************************************************************************************************
263