]> git.donarmstrong.com Git - mothur.git/blob - cooccurrencecommand.cpp
added cooccurence command
[mothur.git] / cooccurrencecommand.cpp
1 /*
2  *  cooccurrencecommand.cpp
3  *  Mothur
4  *
5  *  Created by kiverson on 1/2/12.
6  *  Copyright 2012 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "cooccurrencecommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> CooccurrenceCommand::setParameters() {   
14         try { 
15                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);             
16                 CommandParameter pmetric("metric", "Multiple", "cscore-checker-combo-vratio", "cscore", "", "", "",false,false); parameters.push_back(pmetric);
17                 CommandParameter pmatrix("matrixmodel", "Multiple", "sim1-sim2-sim3-sim4-sim5-sim6-sim7-sim8-sim9", "sim2", "", "", "",false,false); parameters.push_back(pmatrix);
18         CommandParameter pruns("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(pruns);
19                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
20                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
21                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
22         CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
23
24                 vector<string> myArray;
25                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
26                 return myArray;
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "CooccurrenceCommand", "setParameters");
30                 exit(1);
31         }
32 }
33 //**********************************************************************************************************************
34 string CooccurrenceCommand::getHelpString(){    
35         try {
36                 string helpString = "help!";
37
38                 return helpString;
39         }
40         catch(exception& e) {
41                 m->errorOut(e, "CooccurrenceCommand", "getHelpString");
42                 exit(1);
43         }
44 }
45 //**********************************************************************************************************************
46 CooccurrenceCommand::CooccurrenceCommand(){     
47         try {
48                 abort = true; calledHelp = true; 
49                 setParameters();
50         vector<string> tempOutNames;
51                 outputTypes["summary"] = tempOutNames;
52
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "CooccurrenceCommand", "CooccurrenceCommand");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60 CooccurrenceCommand::CooccurrenceCommand(string option) {
61         try {
62                 abort = false; calledHelp = false;   
63                 allLines = 1;
64                                 
65                 //allow user to run help
66                 if(option == "help") { help(); abort = true; calledHelp = true; }
67                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
68                 
69                 else {
70                         vector<string> myArray = setParameters();
71                         
72                         OptionParser parser(option);
73                         map<string,string> parameters = parser.getParameters();
74                         map<string,string>::iterator it;
75                         
76                         ValidParameters validParameter;
77                         
78                         //check to make sure all parameters are valid for command
79                         for (it = parameters.begin(); it != parameters.end(); it++) { 
80                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
81                         }
82
83                         
84                         //if the user changes the input directory command factory will send this info to us in the output parameter 
85                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
86                         if (inputDir == "not found"){   inputDir = "";          }
87                         else {
88                                 string path;
89                                 it = parameters.find("shared");
90                                 //user has given a template file
91                                 if(it != parameters.end()){ 
92                                         path = m->hasPath(it->second);
93                                         //if the user has not given a path then, add inputdir. else leave path alone.
94                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
95                                 }
96                         }
97                 
98             vector<string> tempOutNames;
99             outputTypes["summary"] = tempOutNames;
100                 
101                 //check for optional parameter and set defaults
102                         // ...at some point should added some additional type checking...
103                         label = validParameter.validFile(parameters, "label", false);                   
104                         if (label == "not found") { label = ""; }
105                         else { 
106                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
107                                 else { allLines = 1;  }
108                         }
109                         
110                         //get shared file
111                         sharedfile = validParameter.validFile(parameters, "shared", true);
112                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
113                         else if (sharedfile == "not found") { 
114                                 //if there is a current shared file, use it
115                                 sharedfile = m->getSharedFile(); 
116                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
117                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
118                         }else { m->setSharedFile(sharedfile); }
119                         
120                         
121                         //if the user changes the output directory command factory will send this info to us in the output parameter 
122                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
123
124                         
125                         metric = validParameter.validFile(parameters, "metric", false);                         if (metric == "not found") { metric = "cscore"; }
126                         
127                         if ((metric != "cscore") && (metric != "checker") && (metric != "combo") && (metric != "vratio")) {
128                                 m->mothurOut("[ERROR]: " + metric + " is not a valid metric option for the cooccurrence command. Choices are cscore, checker, combo, vratio."); m->mothurOutEndLine(); abort = true; 
129                         }
130                         
131                         matrix = validParameter.validFile(parameters, "matrix", false);                         if (matrix == "not found") { matrix = "sim2"; }
132                         
133                         if ((matrix != "sim1") && (matrix != "sim2") && (matrix != "sim3") && (matrix != "sim4") && (matrix != "sim5" ) && (matrix != "sim6" ) && (matrix != "sim7" ) && (matrix != "sim8" ) && (matrix != "sim9" )) {
134                                 m->mothurOut("[ERROR]: " + matrix + " is not a valid matrix option for the cooccurrence command. Choices are sim1, sim2, sim3, sim4, sim5, sim6, sim7, sim8, sim9."); m->mothurOutEndLine(); abort = true; 
135                         }
136             
137             groups = validParameter.validFile(parameters, "groups", false);                     
138                         if (groups == "not found") { groups = "";   }
139                         else { 
140                                 m->splitAtDash(groups, Groups); 
141                         }                       
142                         m->setGroups(Groups);
143             
144             string temp = validParameter.validFile(parameters, "iters", false);                 if (temp == "not found") { temp = "1000"; }
145                         m->mothurConvert(temp, runs); 
146
147                 }
148
149         }
150         catch(exception& e) {
151                 m->errorOut(e, "CooccurrenceCommand", "CooccurrenceCommand");
152                 exit(1);
153         }
154 }
155 //**********************************************************************************************************************
156
157 int CooccurrenceCommand::execute(){
158         try {
159         
160                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
161                 
162                 InputData* input = new InputData(sharedfile, "sharedfile");
163                 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
164                 string lastLabel = lookup[0]->getLabel();
165                 
166                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
167                 set<string> processedLabels;
168                 set<string> userLabels = labels;
169
170         ofstream out;
171                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "cooccurence.summary";
172         m->openOutputFile(outputFileName, out);
173         outputNames.push_back(outputFileName);  outputTypes["summary"].push_back(outputFileName);
174         out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
175         out << "metric\tlabel\tScore\tpValue\n";
176
177                 //as long as you are not at the end of the file or done wih the lines you want
178                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
179                         
180                         if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } delete input; out.close(); m->mothurRemove(outputFileName); return 0; }
181         
182                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
183
184                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
185                                 
186                                 getCooccurrence(lookup, out);
187                                 
188                                 processedLabels.insert(lookup[0]->getLabel());
189                                 userLabels.erase(lookup[0]->getLabel());
190                         }
191                         
192                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
193                                 string saveLabel = lookup[0]->getLabel();
194                         
195                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
196                                 lookup = input->getSharedRAbundVectors(lastLabel);
197                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
198                                 getCooccurrence(lookup, out);
199                                 
200                                 processedLabels.insert(lookup[0]->getLabel());
201                                 userLabels.erase(lookup[0]->getLabel());
202                                 
203                                 //restore real lastlabel to save below
204                                 lookup[0]->setLabel(saveLabel);
205                         }
206                         
207                         lastLabel = lookup[0]->getLabel();
208                         //prevent memory leak
209                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
210                         
211                         if (m->control_pressed) {  outputTypes.clear(); delete input; out.close(); m->mothurRemove(outputFileName); return 0; }
212
213                         //get next line to process
214                         lookup = input->getSharedRAbundVectors();                               
215                 }
216                 
217                 if (m->control_pressed) { delete input; out.close(); m->mothurRemove(outputFileName); return 0; }
218
219                 //output error messages about any remaining user labels
220                 set<string>::iterator it;
221                 bool needToRun = false;
222                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
223                         m->mothurOut("Your file does not include the label " + *it); 
224                         if (processedLabels.count(lastLabel) != 1) {
225                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
226                                 needToRun = true;
227                         }else {
228                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
229                         }
230                 }
231         
232                 //run last label if you need to
233                 if (needToRun == true)  {
234                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
235                         lookup = input->getSharedRAbundVectors(lastLabel);
236                         
237                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
238                         
239                         getCooccurrence(lookup, out);
240                         
241                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
242                 }
243         
244         out.close(); 
245         
246                 //reset groups parameter 
247                 delete input; 
248
249         m->mothurOutEndLine();
250                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
251                 m->mothurOut(outputFileName); m->mothurOutEndLine();    
252                 m->mothurOutEndLine();
253         
254                 return 0;
255         }
256         catch(exception& e) {
257                 m->errorOut(e, "CooccurrenceCommand", "execute");
258                 exit(1);
259         }
260 }
261 //**********************************************************************************************************************
262
263 int CooccurrenceCommand::getCooccurrence(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
264         try {
265         int numOTUS = thisLookUp[0]->getNumBins();
266         vector< vector<int> > initmatrix (thisLookUp.size());
267         vector< vector<int> > co_matrix (thisLookUp[0]->getNumBins());
268         for (int i = 0; i < thisLookUp[0]->getNumBins(); i++) { co_matrix[i].resize((thisLookUp.size()), 0); }
269         for (int i = 0; i < thisLookUp.size(); i++) { initmatrix[i].resize((thisLookUp[i]->getNumBins()), 0); }
270         vector<int> columntotal(thisLookUp.size(), 0);
271         vector<int> rowtotal(numOTUS, 0);
272         
273         int rowcount = 0;
274         for (int i = 0; i < thisLookUp.size(); i++) {
275                         for (int j = 0; j < thisLookUp[i]->getNumBins(); j++) {
276                                 if (m->control_pressed) { return 0; }                   
277                                 int abund = thisLookUp[i]->getAbundance(j);
278                                 
279                                 if(abund > 0) {
280                                     initmatrix[i][j] = 1;
281                     co_matrix[j][i] = 1;
282                     rowcount++;
283                     columntotal[j]++;
284                                 }
285                         }
286             rowtotal[i] = rowcount;
287             rowcount = 0;
288         }
289         
290         //nrows is ncols of inital matrix. All the functions need this value. They assume the transposition has already taken place and nrows and ncols refer to that matrix.
291         //comatrix and initmatrix are still vectors of vectors of ints as in the original script. The abundancevector is only what was read in ie not a co-occurrence matrix!
292         int ncols = numOTUS;//rows of inital matrix
293         int nrows = thisLookUp.size();//OTUs
294         double initscore = 0.0;
295         //transpose matrix
296         int newmatrows = ncols;
297         int newmatcols = nrows;
298       
299         //swap for transposed matrix
300         nrows = newmatrows;//ncols;
301         ncols = newmatcols;//nrows;
302         
303         vector<int> initcolumntotal(ncols, 0);
304         vector<int> initrowtotal(nrows, 0);
305         vector<double> stats;
306                
307         TrialSwap2 trial;
308         
309         initcolumntotal = rowtotal;
310         initrowtotal = columntotal;
311         trial.update_row_col_totals(co_matrix, rowtotal, columntotal);
312         
313         if (metric == "cscore")         { initscore = trial.calc_c_score(co_matrix, rowtotal);    }
314         else if (metric == "checker")   { initscore = trial.calc_checker(co_matrix, rowtotal);    }
315         else if (metric == "vratio")    { initscore = trial.calc_vratio(rowtotal, columntotal);   }
316         else if (metric == "combo")     { initscore = trial.calc_combo(co_matrix);                }
317         else                            {  m->mothurOut("[ERROR]: No metric selected!\n");  m->control_pressed = true; return 1;            }
318         
319         m->mothurOut("Initial c score: " + toString(initscore)); m->mothurOutEndLine();
320         
321         //nullmatrix burn in
322         for(int i=0;i<10000;i++) {
323             if (m->control_pressed) { return 0; }
324             if (matrix == "sim1") {
325                 trial.sim1(co_matrix);
326             }else if (matrix == "sim2") {
327                 trial.sim2(co_matrix);
328             }else if (matrix == "sim3") {
329                 trial.sim3(initmatrix);
330                 co_matrix = initmatrix;
331             }else if (matrix == "sim4") {
332                 trial.sim4(columntotal, rowtotal, co_matrix);
333             }else if (matrix == "sim5") {
334                 trial.sim5(initcolumntotal, initrowtotal, initmatrix);
335                 trial.transpose_matrix(initmatrix,co_matrix);
336             }else if (matrix == "sim6") {
337                 trial.sim6(columntotal, co_matrix);
338             }else if (matrix == "sim7") {
339                 trial.sim7(initcolumntotal, initmatrix);          
340                 co_matrix = initmatrix;
341             }else if (matrix == "sim8") {
342                 trial.sim8(columntotal, rowtotal, co_matrix);
343             }else if (matrix == "sim9") {
344                 trial.swap_checkerboards (co_matrix);
345             }else{
346                 m->mothurOut("[ERROR]: No model selected! \n");
347                 m->control_pressed = true;
348             }
349         }
350                 
351         //run
352         for(int i=0;i<runs;i++) {
353             if (m->control_pressed) { return 0; }
354             //calc metric of nullmatrix
355             if (matrix == "sim1") {
356                 trial.sim1(co_matrix);
357             }else if (matrix == "sim2") {
358                 trial.sim2(co_matrix);
359             }else if (matrix == "sim3") {
360                 trial.sim3(initmatrix);
361                 co_matrix = initmatrix;
362             }else if (matrix == "sim4") {
363                 trial.sim4(columntotal, rowtotal, co_matrix);
364             }else if (matrix == "sim5") {
365                 trial.sim5(initcolumntotal, initrowtotal, initmatrix);
366                 trial.transpose_matrix(initmatrix,co_matrix);
367             }else if (matrix == "sim6") {
368                 trial.sim6(columntotal, co_matrix);
369             }else if (matrix == "sim7") {
370                 trial.sim7(initcolumntotal, initmatrix);          
371                 co_matrix = initmatrix;
372             }else if (matrix == "sim8") {
373                 trial.sim8(columntotal, rowtotal, co_matrix);
374             }else if (matrix == "sim9") {
375                 trial.swap_checkerboards (co_matrix);
376             }else{
377                  m->mothurOut("[ERROR]: No model selected! \n");
378                  m->control_pressed = true;
379             }
380             //
381             //            
382             trial.update_row_col_totals(co_matrix, rowtotal, columntotal); 
383             
384             if (metric == "cscore") { 
385                 stats.push_back(trial.calc_c_score(co_matrix, rowtotal));
386             }else if (metric == "checker") { 
387                 stats.push_back(trial.calc_checker(co_matrix, rowtotal));
388             }else if (metric == "vratio") { 
389                 stats.push_back(trial.calc_vratio(rowtotal, columntotal));
390             }else if (metric == "combo") { 
391                 stats.push_back(trial.calc_combo(co_matrix));
392             }else {
393                 m->mothurOut("[ERROR]: No metric selected!\n");
394                 m->control_pressed = true;
395                 return 1;
396             }
397             
398         }
399
400         double total = 0.0;
401         for (int i=0; i<stats.size();i++)   {   total+=stats[i];   }
402         
403         double nullMean = double (total/(double)stats.size()); 
404         
405         m->mothurOutEndLine(); m->mothurOut("average metric score: " + toString(nullMean)); m->mothurOutEndLine();
406         
407         double pvalue = 0.0;
408         if (metric == "cscore" || metric == "checker") {    pvalue = trial.calc_pvalue_greaterthan (stats, initscore);   }
409         else{   pvalue = trial.calc_pvalue_lessthan (stats, initscore); }
410         
411         m->mothurOut("pvalue: " + toString(pvalue)); m->mothurOutEndLine();
412         out << metric << '\t' << thisLookUp[0]->getLabel() << '\t' << nullMean << '\t' << pvalue << endl;
413         
414         return 0;
415         }
416         catch(exception& e) {
417                 m->errorOut(e, "CooccurrenceCommand", "Cooccurrence");
418                 exit(1);
419         }
420 }
421 //**********************************************************************************************************************
422
423