]> git.donarmstrong.com Git - mothur.git/blob - kruskalwalliscommand.cpp
merged mgcluster. kw files added, but not working in this commit!
[mothur.git] / kruskalwalliscommand.cpp
1 /* 
2  * File:   kruskalwalliscommand.cpp
3  * Author: kiverson
4  *
5  * Created on June 26, 2012, 11:06 AM
6  */
7
8 #include "kruskalwalliscommand.h"
9
10 //**********************************************************************************************************************
11 vector<string> KruskalWallisCommand::setParameters(){   
12         try {
13                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
14                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
15         CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
16         CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);     
17                 
18                 vector<string> myArray;
19                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
20                 return myArray;
21         }
22         catch(exception& e) {
23                 m->errorOut(e, "KruskalWallisCommand", "setParameters");
24                 exit(1);
25         }
26 }
27 //**********************************************************************************************************************
28 string KruskalWallisCommand::getHelpString(){   
29         try {
30                 string helpString = "";
31                 helpString += "The kruskalwallis command parameter options are \n";
32         helpString += "Kruskal–Wallis one-way analysis of variance is a non-parametric method for testing whether samples originate from the same distribution.";
33                 return helpString;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "KruskalWallisCommand", "getHelpString");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 string KruskalWallisCommand::getOutputFileNameTag(string type, string inputName=""){    
42         try {
43         string outputFileName = "";
44                 map<string, vector<string> >::iterator it;
45         
46         //is this a type this command creates
47         it = outputTypes.find(type);
48         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
49         else {
50             if (type == "summary") {  outputFileName =  "cooccurence.summary"; }
51             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
52         }
53         return outputFileName;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "KruskalWallisCommand", "getOutputFileNameTag");
57                 exit(1);
58         }
59 }
60 //**********************************************************************************************************************
61 KruskalWallisCommand::KruskalWallisCommand(){   
62         try {
63                 abort = true; calledHelp = true; 
64                 setParameters();
65         vector<string> tempOutNames;
66                 outputTypes["summary"] = tempOutNames;
67
68         }
69         catch(exception& e) {
70                 m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand");
71                 exit(1);
72         }
73 }
74 //**********************************************************************************************************************
75 KruskalWallisCommand::KruskalWallisCommand(string option) {
76         try {
77                 abort = false; calledHelp = false;   
78                                 
79                 //allow user to run help
80                 if(option == "help") { help(); abort = true; calledHelp = true; }
81                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
82                 
83                 else {
84                         vector<string> myArray = setParameters();
85                         
86                         OptionParser parser(option);
87                         map<string,string> parameters = parser.getParameters();
88                         map<string,string>::iterator it;
89                         
90                         ValidParameters validParameter;
91                         
92                         //check to make sure all parameters are valid for command
93                         for (it = parameters.begin(); it != parameters.end(); it++) { 
94                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
95                         }
96             
97             //get shared file
98                         sharedfile = validParameter.validFile(parameters, "shared", true);
99                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
100                         else if (sharedfile == "not found") { 
101                                 //if there is a current shared file, use it
102                                 sharedfile = m->getSharedFile(); 
103                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
104                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
105                         }else { m->setSharedFile(sharedfile); }
106             
107             //if the user changes the output directory command factory will send this info to us in the output parameter 
108                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
109                     
110             groups = validParameter.validFile(parameters, "groups", false);   
111             if (groups == "not found") { groups = "";   }
112             else { 
113             m->splitAtDash(groups, Groups); 
114             }   
115             m->setGroups(Groups);
116                                 
117                         //if the user changes the input directory command factory will send this info to us in the output parameter 
118                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
119                         if (inputDir == "not found"){   inputDir = "";          }
120                         else {
121                                 string path;
122                                 it = parameters.find("shared");
123                                 //user has given a template file
124                                 if(it != parameters.end()){ 
125                                         path = m->hasPath(it->second);
126                                         //if the user has not given a path then, add inputdir. else leave path alone.
127                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
128                                 }
129                         }
130                 
131             vector<string> tempOutNames;
132             outputTypes["summary"] = tempOutNames;
133
134
135                 }
136
137         }
138         catch(exception& e) {
139                 m->errorOut(e, "KruskalWallisCommand", "KruskalWallisCommand");
140                 exit(1);
141         }
142 }
143 //**********************************************************************************************************************
144 int KruskalWallisCommand::execute(){
145         try {
146                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
147         
148         InputData* input = new InputData(sharedfile, "sharedfile");
149         vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
150                 string lastLabel = lookup[0]->getLabel();
151         
152         
153                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
154                 set<string> processedLabels;
155                 set<string> userLabels = labels;
156
157         ofstream out;
158                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + getOutputFileNameTag("summary");
159         m->openOutputFile(outputFileName, out);
160         outputNames.push_back(outputFileName);  outputTypes["summary"].push_back(outputFileName);
161         out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
162         out << "H\tpvalue\n";
163         
164         //math goes here
165         
166         int N = m->getNumGroups();
167         double H;
168         double tmp = 0.0;
169         vector<groupRank> vec;
170         vector<string> groups = m->getGroups();
171         string group;
172         int count;
173         double sum;
174                 
175         //merge all groups into a vector
176         
177         
178         
179         //rank function here
180         assignRank(vec);
181         
182         //populate counts and ranSums vectors
183         for (int i=0;i<N;i++) {
184             count = 0;
185             sum = 0;
186             group = groups[i];
187             for(int j;j<vec.size();j++) {
188                 if (vec[j].group == group) {
189                     count++;
190                     sum = sum + vec[j].rank;
191                 }
192             }
193             counts[i] = count;
194             rankSums[i] = sum;
195         }
196         
197         //test statistic
198         for (int i=0;i<N;i++) { tmp = tmp + (pow(rankSums[i],2) / counts[i]); }
199         
200         H = (12 / (N*(N+1))) * tmp - (3*(N+1));
201         
202         //ss = tmp - pow(accumulate(rankSums.begin(), rankSums.end(), 0), 2);
203         
204         //H = ss / ( (N * (N + 1))/12 );
205         
206         //correction for ties?
207         
208         //p-value calculation
209         
210                 return 0;
211         }
212         catch(exception& e) {
213                 m->errorOut(e, "KruskalWallisCommand", "execute");
214                 exit(1);
215         }
216 }
217 //**********************************************************************************************************************
218 void KruskalWallisCommand::assignRank(vector<groupRank> &vec) {
219     try {
220         double rank = 1;
221         double numRanks, avgRank, j;
222         vector<groupRank>::iterator it, oldit;
223
224         sort (vec.begin(), vec.end(), comparevalue);
225
226         it = vec.begin();
227
228         while ( it != vec.end() ) {
229             j = rank;
230             oldit = it;
231             if (!equalvalue(*it, *(it+1))) {
232                 (*it).rank = rank; 
233                 rank = rank+1; 
234                 it++; }
235             else {
236                 while(equalrank(*it, *(it+1))) {
237                     j = j + (j+1);
238                     rank++;
239                     it++;
240                 }
241                 numRanks = double (distance(oldit, it));
242                 avgRank = j / numRanks;
243                 while(oldit != it) {
244                     (*oldit).rank = avgRank;
245                     oldit++;
246                 }
247             }
248
249         }
250         
251
252     }
253     catch(exception& e) {
254                 m->errorOut(e, "KruskalWallisCommand", "getRank");
255                 exit(1);
256         }
257     
258 }
259 //**********************************************************************************************************************
260 void KruskalWallisCommand::assignValue(vector<groupRank> &vec) {
261     
262 }
263 //**********************************************************************************************************************
264 //**********************************************************************************************************************
265 //**********************************************************************************************************************