]> git.donarmstrong.com Git - mothur.git/blob - libshuffcommand.cpp
added pipeline commands which involved change to command factory and command class...
[mothur.git] / libshuffcommand.cpp
1 /*
2  *  libshuffcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 3/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 /* This class is designed to implement an integral form of the Cramer-von Mises statistic.
11         you may refer to the "Integration of Microbial Ecology and Statistics: A Test To Compare Gene Libraries" 
12         paper in Applied and Environmental Microbiology, Sept. 2004, p. 5485-5492 0099-2240/04/$8.00+0  
13         DOI: 10.1128/AEM.70.9.5485-5492.2004 Copyright 2004 American Society for Microbiology for more information. */
14
15
16 #include "libshuffcommand.h"
17 #include "libshuff.h"
18 #include "slibshuff.h"
19 #include "dlibshuff.h"
20
21 //**********************************************************************************************************************
22 vector<string> LibShuffCommand::getValidParameters(){   
23         try {
24                 string Array[] =  {"iters","groups","step","form","cutoff","outputdir","inputdir"};
25                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                 return myArray;
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "LibShuffCommand", "getValidParameters");
30                 exit(1);
31         }
32 }
33 //**********************************************************************************************************************
34 LibShuffCommand::LibShuffCommand(){     
35         try {
36                 //initialize outputTypes
37                 vector<string> tempOutNames;
38                 outputTypes["coverage"] = tempOutNames;
39                 outputTypes["libshuffsummary"] = tempOutNames;
40         }
41         catch(exception& e) {
42                 m->errorOut(e, "LibShuffCommand", "LibShuffCommand");
43                 exit(1);
44         }
45 }
46 //**********************************************************************************************************************
47 vector<string> LibShuffCommand::getRequiredParameters(){        
48         try {
49                 vector<string> myArray;
50                 return myArray;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "LibShuffCommand", "getRequiredParameters");
54                 exit(1);
55         }
56 }
57 //**********************************************************************************************************************
58 vector<string> LibShuffCommand::getRequiredFiles(){     
59         try {
60                 string Array[] =  {"phylip","group"};
61                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
62                 return myArray;
63         }
64         catch(exception& e) {
65                 m->errorOut(e, "LibShuffCommand", "getRequiredFiles");
66                 exit(1);
67         }
68 }
69 //**********************************************************************************************************************
70
71 LibShuffCommand::LibShuffCommand(string option)  {
72         try {
73                 globaldata = GlobalData::getInstance();
74                 abort = false;
75                 Groups.clear();
76                 
77                 //allow user to run help
78                 if(option == "help") { help(); abort = true; }
79                 
80                 else {
81                         //valid paramters for this command
82                         string Array[] =  {"iters","groups","step","form","cutoff","outputdir","inputdir"};
83                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
84                         
85                         OptionParser parser(option);
86                         map<string, string> parameters = parser.getParameters();
87                         
88                         ValidParameters validParameter;
89                 
90                         //check to make sure all parameters are valid for command
91                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
92                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
93                         }
94                         
95                         //initialize outputTypes
96                         vector<string> tempOutNames;
97                         outputTypes["coverage"] = tempOutNames;
98                         outputTypes["libshuffsummary"] = tempOutNames;
99                         
100                         //if the user changes the output directory command factory will send this info to us in the output parameter 
101                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
102                                 outputDir = ""; 
103                                 outputDir += m->hasPath(globaldata->getPhylipFile()); //if user entered a file with a path then preserve it     
104                         }
105                         
106                         //make sure the user has already run the read.dist command
107                         if ((globaldata->gMatrix == NULL) || (globaldata->gGroupmap == NULL)) {
108                                 m->mothurOut("You must read in a matrix and groupfile using the read.dist command, before you use the libshuff command. "); m->mothurOutEndLine(); abort = true;; 
109                         }
110                                                 
111                         //check for optional parameter and set defaults
112                         // ...at some point should added some additional type checking...
113                         groups = validParameter.validFile(parameters, "groups", false);                 
114                         if (groups == "not found") { groups = ""; savegroups = groups; }
115                         else { 
116                                 savegroups = groups;
117                                 m->splitAtDash(groups, Groups);
118                                 globaldata->Groups = Groups;
119                         }
120                                 
121                         string temp;
122                         temp = validParameter.validFile(parameters, "iters", false);                            if (temp == "not found") { temp = "10000"; }
123                         convert(temp, iters); 
124                         
125                         temp = validParameter.validFile(parameters, "cutoff", false);                           if (temp == "not found") { temp = "1.0"; }
126                         convert(temp, cutOff); 
127                         
128                         temp = validParameter.validFile(parameters, "step", false);                             if (temp == "not found") { temp = "0.01"; }
129                         convert(temp, step); 
130         
131                         userform = validParameter.validFile(parameters, "form", false);                 if (userform == "not found") { userform = "integral"; }
132                         
133                         if (abort == false) {
134                 
135                                 matrix = globaldata->gMatrix;                           //get the distance matrix
136                                 setGroups();                                                            //set the groups to be analyzed and sorts them
137         
138                                 /********************************************************************************************/
139                                 //this is needed because when we read the matrix we sort it into groups in alphabetical order
140                                 //the rest of the command and the classes used in this command assume specific order
141                                 /********************************************************************************************/
142                                 matrix->setGroups(globaldata->gGroupmap->namesOfGroups);
143                                 vector<int> sizes;
144                                 for (int i = 0; i < globaldata->gGroupmap->namesOfGroups.size(); i++) {   sizes.push_back(globaldata->gGroupmap->getNumSeqs(globaldata->gGroupmap->namesOfGroups[i]));  }
145                                 matrix->setSizes(sizes);
146                         
147
148                                 if(userform == "discrete"){
149                                         form = new DLibshuff(matrix, iters, step, cutOff);
150                                 }
151                                 else{
152                                         form = new SLibshuff(matrix, iters, cutOff);
153                                 }
154                         }
155                         
156                 }
157                 
158         }
159         catch(exception& e) {
160                 m->errorOut(e, "LibShuffCommand", "LibShuffCommand");
161                 exit(1);
162         }
163 }
164 //**********************************************************************************************************************
165
166 void LibShuffCommand::help(){
167         try {
168                 m->mothurOut("The libshuff command can only be executed after a successful read.dist command including a groupfile.\n");
169                 m->mothurOut("The libshuff command parameters are groups, iters, step, form and cutoff.  No parameters are required.\n");
170                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n");
171                 m->mothurOut("The group names are separated by dashes.  The iters parameter allows you to specify how many random matrices you would like compared to your matrix.\n");
172                 m->mothurOut("The step parameter allows you to specify change in distance you would like between each output if you are using the discrete form.\n");
173                 m->mothurOut("The form parameter allows you to specify if you would like to analyze your matrix using the discrete or integral form. Your options are integral or discrete.\n");
174                 m->mothurOut("The libshuff command should be in the following format: libshuff(groups=yourGroups, iters=yourIters, cutOff=yourCutOff, form=yourForm, step=yourStep).\n");
175                 m->mothurOut("Example libshuff(groups=A-B-C, iters=500, form=discrete, step=0.01, cutOff=2.0).\n");
176                 m->mothurOut("The default value for groups is all the groups in your groupfile, iters is 10000, cutoff is 1.0, form is integral and step is 0.01.\n");
177                 m->mothurOut("The libshuff command output two files: .coverage and .slsummary their descriptions are in the manual.\n");
178                 m->mothurOut("Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e.yourIters).\n\n");
179         }
180         catch(exception& e) {
181                 m->errorOut(e, "LibShuffCommand", "help");
182                 exit(1);
183         }
184 }
185
186 //**********************************************************************************************************************
187
188 int LibShuffCommand::execute(){
189         try {
190                 
191                 if (abort == true) {    return 0;       }
192         
193                 savedDXYValues = form->evaluateAll();
194                 savedMinValues = form->getSavedMins();
195                 
196                 if (m->control_pressed) {  delete form; globaldata->Groups.clear(); delete globaldata->gMatrix;  globaldata->gMatrix = NULL; return 0; }
197         
198                 pValueCounts.resize(numGroups);
199                 for(int i=0;i<numGroups;i++){
200                         pValueCounts[i].assign(numGroups, 0);
201                 }
202         
203                 if (m->control_pressed) {  outputTypes.clear(); delete form; globaldata->Groups.clear(); delete globaldata->gMatrix;  globaldata->gMatrix = NULL; return 0; }
204                                 
205                 Progress* reading = new Progress();
206                 
207                 for(int i=0;i<numGroups-1;i++) {
208                         for(int j=i+1;j<numGroups;j++) {
209                                 
210                                 if (m->control_pressed) {  outputTypes.clear();  delete form; globaldata->Groups.clear(); delete globaldata->gMatrix;  globaldata->gMatrix = NULL; delete reading; return 0; }
211
212                                 reading->newLine(groupNames[i]+'-'+groupNames[j], iters);
213                                 int spoti = globaldata->gGroupmap->groupIndex[groupNames[i]]; //neccessary in case user selects groups so you know where they are in the matrix
214                                 int spotj = globaldata->gGroupmap->groupIndex[groupNames[j]];
215         
216                                 for(int p=0;p<iters;p++) {      
217                                         
218                                         if (m->control_pressed) {  outputTypes.clear(); delete form; globaldata->Groups.clear(); delete globaldata->gMatrix;  globaldata->gMatrix = NULL; delete reading; return 0; }
219                                         
220                                         form->randomizeGroups(spoti,spotj); 
221                                         if(form->evaluatePair(spoti,spotj) >= savedDXYValues[spoti][spotj])     {       pValueCounts[i][j]++;   }
222                                         if(form->evaluatePair(spotj,spoti) >= savedDXYValues[spotj][spoti])     {       pValueCounts[j][i]++;   }
223                                         
224                                         if (m->control_pressed) {  outputTypes.clear(); delete form; globaldata->Groups.clear(); delete globaldata->gMatrix;  globaldata->gMatrix = NULL; delete reading; return 0; }
225                                         
226                                         reading->update(p);                     
227                                 }
228                                 form->resetGroup(spoti);
229                                 form->resetGroup(spotj);
230                         }
231                 }
232                 
233                 if (m->control_pressed) { outputTypes.clear();  delete form; globaldata->Groups.clear(); delete globaldata->gMatrix;  globaldata->gMatrix = NULL; delete reading; return 0; }
234         
235                 reading->finish();
236                 delete reading;
237
238                 m->mothurOutEndLine();
239                 printSummaryFile();
240                 printCoverageFile();
241                                 
242                 //clear out users groups
243                 globaldata->Groups.clear();
244                 delete form;
245                 
246                 //delete globaldata's copy of the gmatrix to free up memory
247                 delete globaldata->gMatrix;  globaldata->gMatrix = NULL;
248                 
249                 if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0; }
250
251                 
252                 m->mothurOutEndLine();
253                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
254                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
255                 m->mothurOutEndLine();
256                 
257                 return 0;
258         }
259         catch(exception& e) {
260                 m->errorOut(e, "LibShuffCommand", "execute");
261                 exit(1);
262         }
263 }
264
265 //**********************************************************************************************************************
266
267 int LibShuffCommand::printCoverageFile() {
268         try {
269
270                 ofstream outCov;
271                 summaryFile = outputDir + m->getRootName(m->getSimpleName(globaldata->getPhylipFile())) + "libshuff.coverage";
272                 m->openOutputFile(summaryFile, outCov);
273                 outputNames.push_back(summaryFile); outputTypes["coverage"].push_back(summaryFile);
274                 outCov.setf(ios::fixed, ios::floatfield); outCov.setf(ios::showpoint);
275                 //cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
276                 
277                 map<double,vector<int> > allDistances;
278                 map<double,vector<int> >::iterator it;
279
280                 vector<vector<int> > indices(numGroups);
281                 int numIndices = numGroups * numGroups;
282                 
283                 int index = 0;
284                 for(int i=0;i<numGroups;i++){
285                         indices[i].assign(numGroups,0);
286                         for(int j=0;j<numGroups;j++){
287                                 indices[i][j] = index++;
288                                 
289                                 int spoti = globaldata->gGroupmap->groupIndex[groupNames[i]]; //neccessary in case user selects groups so you know where they are in the matrix
290                                 int spotj = globaldata->gGroupmap->groupIndex[groupNames[j]];
291                                 
292                                 for(int k=0;k<savedMinValues[spoti][spotj].size();k++){
293                                         
294                                         if(m->control_pressed)  { outCov.close(); return 0; }
295                                         
296                                         if(allDistances[savedMinValues[spoti][spotj][k]].size() != 0){
297                                                 allDistances[savedMinValues[spoti][spotj][k]][indices[i][j]]++;
298                                         }
299                                         else{
300                                                 allDistances[savedMinValues[spoti][spotj][k]].assign(numIndices, 0);
301                                                 allDistances[savedMinValues[spoti][spotj][k]][indices[i][j]] = 1;
302                                         }
303                                 }
304                         }
305                 }
306                 it=allDistances.begin();
307                 
308                 //cout << setprecision(8);
309
310                 vector<int> prevRow = it->second;
311                 it++;
312                 
313                 for(;it!=allDistances.end();it++){
314                         for(int i=0;i<it->second.size();i++){
315                                 it->second[i] += prevRow[i];
316                         }
317                         prevRow = it->second;
318                 }
319                 
320                 vector<int> lastRow = allDistances.rbegin()->second;
321                 outCov << setprecision(8);
322                 
323                 outCov << "dist";
324                 for (int i = 0; i < numGroups; i++){
325                         outCov << '\t' << groupNames[i];
326                 }
327                 for (int i=0;i<numGroups;i++){
328                         for(int j=i+1;j<numGroups;j++){
329                                 if(m->control_pressed)  { outCov.close(); return 0; }
330                                 outCov << '\t' << groupNames[i] << '-' << groupNames[j] << '\t';
331                                 outCov << groupNames[j] << '-' << groupNames[i];
332                         }
333                 }
334                 outCov << endl;
335                 
336                 for(it=allDistances.begin();it!=allDistances.end();it++){
337                         outCov << it->first << '\t';
338                         for(int i=0;i<numGroups;i++){
339                                 outCov << it->second[indices[i][i]]/(float)lastRow[indices[i][i]] << '\t';
340                         }
341                         for(int i=0;i<numGroups;i++){
342                                 for(int j=i+1;j<numGroups;j++){
343                                         if(m->control_pressed)  { outCov.close(); return 0; }
344                                         
345                                         outCov << it->second[indices[i][j]]/(float)lastRow[indices[i][j]] << '\t';
346                                         outCov << it->second[indices[j][i]]/(float)lastRow[indices[j][i]] << '\t';
347                                 }
348                         }
349                         outCov << endl;
350                 }
351                 outCov.close();
352                 
353                 return 0;
354         }
355         catch(exception& e) {
356                 m->errorOut(e, "LibShuffCommand", "printCoverageFile");
357                 exit(1);
358         }
359
360
361 //**********************************************************************************************************************
362
363 int LibShuffCommand::printSummaryFile() {
364         try {
365
366                 ofstream outSum;
367                 summaryFile = outputDir + m->getRootName(m->getSimpleName(globaldata->getPhylipFile())) + "libshuff.summary";
368                 m->openOutputFile(summaryFile, outSum);
369                 outputNames.push_back(summaryFile); outputTypes["libshuffsummary"].push_back(summaryFile);
370
371                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
372                 cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
373                 
374                 cout << setw(20) << left << "Comparison" << '\t' << setprecision(8) << "dCXYScore" << '\t' << "Significance" << endl;
375                 m->mothurOutJustToLog("Comparison\tdCXYScore\tSignificance"); m->mothurOutEndLine();
376                 outSum << setw(20) << left << "Comparison" << '\t' << setprecision(8) << "dCXYScore" << '\t' << "Significance" << endl;
377         
378                 int precision = (int)log10(iters);
379                 for(int i=0;i<numGroups;i++){
380                         for(int j=i+1;j<numGroups;j++){
381                                 if(m->control_pressed)  { outSum.close(); return 0; }
382                                 
383                                 int spoti = globaldata->gGroupmap->groupIndex[groupNames[i]]; //neccessary in case user selects groups so you know where they are in the matrix
384                                 int spotj = globaldata->gGroupmap->groupIndex[groupNames[j]];
385                                 
386                                 if(pValueCounts[i][j]){
387                                         cout << setw(20) << left << groupNames[i]+'-'+groupNames[j] << '\t' << setprecision(8) << savedDXYValues[spoti][spotj] << '\t' << setprecision(precision) << pValueCounts[i][j]/(float)iters << endl;
388                                         m->mothurOutJustToLog(groupNames[i]+"-"+groupNames[j] + "\t" + toString(savedDXYValues[spoti][spotj]) + "\t" + toString((pValueCounts[i][j]/(float)iters))); m->mothurOutEndLine();
389                                         outSum << setw(20) << left << groupNames[i]+'-'+groupNames[j] << '\t' << setprecision(8) << savedDXYValues[spoti][spotj] << '\t' << setprecision(precision) << pValueCounts[i][j]/(float)iters << endl;
390                                 }
391                                 else{
392                                         cout << setw(20) << left << groupNames[i]+'-'+groupNames[j] << '\t' << setprecision(8) << savedDXYValues[spoti][spotj] << '\t' << '<' <<setprecision(precision) << 1/(float)iters << endl;
393                                         m->mothurOutJustToLog(groupNames[i]+"-"+groupNames[j] + "\t" + toString(savedDXYValues[spoti][spotj]) + "\t" + toString((1/(float)iters))); m->mothurOutEndLine();
394                                         outSum << setw(20) << left << groupNames[i]+'-'+groupNames[j] << '\t' << setprecision(8) << savedDXYValues[spoti][spotj] << '\t' << '<' <<setprecision(precision) << 1/(float)iters << endl;
395                                 }
396                                 if(pValueCounts[j][i]){
397                                         cout << setw(20) << left << groupNames[j]+'-'+groupNames[i] << '\t' << setprecision(8) << savedDXYValues[spotj][spoti] << '\t' << setprecision (precision) << pValueCounts[j][i]/(float)iters << endl;
398                                         m->mothurOutJustToLog(groupNames[j]+"-"+groupNames[i] + "\t" + toString(savedDXYValues[spotj][spoti]) + "\t" + toString((pValueCounts[j][i]/(float)iters))); m->mothurOutEndLine();
399                                         outSum << setw(20) << left << groupNames[j]+'-'+groupNames[i] << '\t' << setprecision(8) << savedDXYValues[spotj][spoti] << '\t' << setprecision (precision) << pValueCounts[j][i]/(float)iters << endl;
400                                 }
401                                 else{
402                                         cout << setw(20) << left << groupNames[j]+'-'+groupNames[i] << '\t' << setprecision(8) << savedDXYValues[spotj][spoti] << '\t' << '<' <<setprecision (precision) << 1/(float)iters << endl;
403                                         m->mothurOutJustToLog(groupNames[j]+"-"+groupNames[i] + "\t" + toString(savedDXYValues[spotj][spoti]) + "\t" + toString((1/(float)iters))); m->mothurOutEndLine();
404                                         outSum << setw(20) << left << groupNames[j]+'-'+groupNames[i] << '\t' << setprecision(8) << savedDXYValues[spotj][spoti] << '\t' << '<' <<setprecision (precision) << 1/(float)iters << endl;
405                                 }
406                         }
407                 }
408                 
409                 outSum.close();
410                 return 0;
411         }
412         catch(exception& e) {
413                 m->errorOut(e, "LibShuffCommand", "printSummaryFile");
414                 exit(1);
415         }
416
417
418 //**********************************************************************************************************************
419
420 void LibShuffCommand::setGroups() {
421         try {
422                 //if the user has not entered specific groups to analyze then do them all
423                 if (globaldata->Groups.size() == 0) {
424                         numGroups = globaldata->gGroupmap->getNumGroups();
425                         for (int i=0; i < numGroups; i++) { 
426                                 globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
427                         }
428                 } else {
429                         if (savegroups != "all") {
430                                 //check that groups are valid
431                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
432                                         if (globaldata->gGroupmap->isValidGroup(globaldata->Groups[i]) != true) {
433                                                 m->mothurOut(globaldata->Groups[i] + " is not a valid group, and will be disregarded."); m->mothurOutEndLine();
434                                                 // erase the invalid group from globaldata->Groups
435                                                 globaldata->Groups.erase(globaldata->Groups.begin()+i);
436                                         }
437                                 }
438                         
439                                 //if the user only entered invalid groups
440                                 if ((globaldata->Groups.size() == 0) || (globaldata->Groups.size() == 1)) { 
441                                         numGroups = globaldata->gGroupmap->getNumGroups();
442                                         for (int i=0; i < numGroups; i++) { 
443                                                 globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
444                                         }
445                                         m->mothurOut("When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile."); m->mothurOutEndLine();
446                                 } else { numGroups = globaldata->Groups.size(); }
447                         } else { //users wants all groups
448                                 numGroups = globaldata->gGroupmap->getNumGroups();
449                                 globaldata->Groups.clear();
450                                 for (int i=0; i < numGroups; i++) { 
451                                         globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
452                                 }
453                         }
454                 }
455
456                 //sort so labels match
457                 sort(globaldata->Groups.begin(), globaldata->Groups.end());
458                 
459                 //sort
460                 sort(globaldata->gGroupmap->namesOfGroups.begin(), globaldata->gGroupmap->namesOfGroups.end());
461                 
462                 for (int i = 0; i < globaldata->gGroupmap->namesOfGroups.size(); i++) {  globaldata->gGroupmap->groupIndex[globaldata->gGroupmap->namesOfGroups[i]] = i;  }
463
464                 groupNames = globaldata->Groups;
465
466         }
467         catch(exception& e) {
468                 m->errorOut(e, "LibShuffCommand", "setGroups");
469                 exit(1);
470         }
471 }
472
473 /***********************************************************/