]> git.donarmstrong.com Git - mothur.git/blob - libshuffcommand.cpp
pds fixes of heatmap and some other minor stuff
[mothur.git] / libshuffcommand.cpp
1 /*
2  *  libshuffcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 3/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 /* This class is designed to implement an integral form of the Cramer-von Mises statistic.
11         you may refer to the "Integration of Microbial Ecology and Statistics: A Test To Compare Gene Libraries" 
12         paper in Applied and Environmental Microbiology, Sept. 2004, p. 5485-5492 0099-2240/04/$8.00+0  
13         DOI: 10.1128/AEM.70.9.5485-5492.2004 Copyright 2004 American Society for Microbiology for more information. */
14
15
16 #include "libshuffcommand.h"
17 #include "libshuff.h"
18 #include "slibshuff.h"
19 #include "dlibshuff.h"
20
21 //**********************************************************************************************************************
22
23 LibShuffCommand::LibShuffCommand(){
24         try {
25                 srand( (unsigned)time( NULL ) );
26                 
27                 globaldata = GlobalData::getInstance();
28                 convert(globaldata->getCutOff(), cutOff);       //get the cutoff
29                 convert(globaldata->getIters(), iters);         //get the number of iterations
30                 convert(globaldata->getStep(), step);           //get the step size for the discrete command
31                 matrix = globaldata->gMatrix;                           //get the distance matrix
32                 setGroups();                                                            //set the groups to be analyzed
33
34                 if(globaldata->getForm() == "discrete"){
35                         form = new DLibshuff(matrix, iters, step, cutOff);
36                 }
37                 else{
38                         form = new SLibshuff(matrix, iters, cutOff);
39                 }
40         }
41         catch(exception& e) {
42                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function LibShuffCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
43                 exit(1);
44         }
45         catch(...) {
46                 cout << "An unknown error has occurred in the LibShuffCommand class function LibShuffCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
47                 exit(1);
48         }       
49                         
50 }
51
52 //**********************************************************************************************************************
53
54 int LibShuffCommand::execute(){
55         try {
56
57                 savedDXYValues = form->evaluateAll();
58                 savedMinValues = form->getSavedMins();
59                 
60                 pValueCounts.resize(numGroups);
61                 for(int i=0;i<numGroups;i++){
62                         pValueCounts[i].assign(numGroups, 0);
63                 }
64                 
65                 Progress* reading = new Progress();
66                 
67                 for(int i=0;i<numGroups-1;i++) {
68                         for(int j=i+1;j<numGroups;j++) {
69                                 reading->newLine(groupNames[i]+'-'+groupNames[j], iters);
70                                 for(int p=0;p<iters;p++) {              
71                                         form->randomizeGroups(i,j);
72                                         if(form->evaluatePair(i,j) >= savedDXYValues[i][j])     {       pValueCounts[i][j]++;   }
73                                         if(form->evaluatePair(j,i) >= savedDXYValues[j][i])     {       pValueCounts[j][i]++;   }
74                                         reading->update(p);                     
75                                 }
76                                 form->resetGroup(i);
77                                 form->resetGroup(j);
78                         }
79                 }
80                 reading->finish();
81                 delete reading;
82
83                 cout << endl;
84                 printSummaryFile();
85                 printCoverageFile();
86                 
87                 //clear out users groups
88                 globaldata->Groups.clear();
89                 delete form;
90                 
91                 return 0;
92         }
93         catch(exception& e) {
94                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
95                 exit(1);
96         }
97         catch(...) {
98                 cout << "An unknown error has occurred in the LibShuffCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
99                 exit(1);
100         }       
101 }
102
103 //**********************************************************************************************************************
104
105 void LibShuffCommand::printCoverageFile() {
106         try {
107
108                 ofstream outCov;
109                 summaryFile = getRootName(globaldata->getPhylipFile()) + "libshuff.coverage";
110                 openOutputFile(summaryFile, outCov);
111                 outCov.setf(ios::fixed, ios::floatfield); outCov.setf(ios::showpoint);
112                 cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
113                 
114                 map<double,vector<int> > allDistances;
115                 map<double,vector<int> >::iterator it;
116
117                 vector<vector<int> > indices(numGroups);
118                 int numIndices = numGroups * numGroups;
119                 
120                 int index = 0;
121                 for(int i=0;i<numGroups;i++){
122                         indices[i].assign(numGroups,0);
123                         for(int j=0;j<numGroups;j++){
124                                 indices[i][j] = index++;
125                                 for(int k=0;k<savedMinValues[i][j].size();k++){
126                                         if(allDistances[savedMinValues[i][j][k]].size() != 0){
127                                                 allDistances[savedMinValues[i][j][k]][indices[i][j]]++;
128                                         }
129                                         else{
130                                                 allDistances[savedMinValues[i][j][k]].assign(numIndices, 0);
131                                                 allDistances[savedMinValues[i][j][k]][indices[i][j]] = 1;
132                                         }
133                                 }
134                         }
135                 }
136                 it=allDistances.begin();
137                 
138                 cout << setprecision(8);
139
140                 vector<int> prevRow = it->second;
141                 it++;
142                 
143                 for(it;it!=allDistances.end();it++){
144                         for(int i=0;i<it->second.size();i++){
145                                 it->second[i] += prevRow[i];
146                         }
147                         prevRow = it->second;
148                 }
149                 
150                 vector<int> lastRow = allDistances.rbegin()->second;
151                 outCov << setprecision(8);
152                 
153                 outCov << "dist";
154                 for (int i = 0; i < numGroups; i++){
155                         outCov << '\t' << groupNames[i];
156                 }
157                 for (int i=0;i<numGroups;i++){
158                         for(int j=i+1;j<numGroups;j++){
159                                 outCov << '\t' << groupNames[i] << '-' << groupNames[j] << '\t';
160                                 outCov << groupNames[j] << '-' << groupNames[i];
161                         }
162                 }
163                 outCov << endl;
164                 
165                 for(it=allDistances.begin();it!=allDistances.end();it++){
166                         outCov << it->first << '\t';
167                         for(int i=0;i<numGroups;i++){
168                                 outCov << it->second[indices[i][i]]/(float)lastRow[indices[i][i]] << '\t';
169                         }
170                         for(int i=0;i<numGroups;i++){
171                                 for(int j=i+1;j<numGroups;j++){
172                                         outCov << it->second[indices[i][j]]/(float)lastRow[indices[i][j]] << '\t';
173                                         outCov << it->second[indices[j][i]]/(float)lastRow[indices[j][i]] << '\t';
174                                 }
175                         }
176                         outCov << endl;
177                 }
178                 
179         }
180         catch(exception& e) {
181                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function printCoverageFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
182                 exit(1);
183         }
184         catch(...) {
185                 cout << "An unknown error has occurred in the LibShuffCommand class function printCoverageFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
186                 exit(1);
187         }       
188
189
190 //**********************************************************************************************************************
191
192 void LibShuffCommand::printSummaryFile() {
193         try {
194
195                 ofstream outSum;
196                 summaryFile = getRootName(globaldata->getPhylipFile()) + "libshuff.summary";
197                 openOutputFile(summaryFile, outSum);
198
199                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
200                 cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
201                 
202                 cout << setw(20) << left << "Comparison" << '\t' << setprecision(8) << "dCXYScore" << '\t' << "Significance" << endl;
203                 outSum << setw(20) << left << "Comparison" << '\t' << setprecision(8) << "dCXYScore" << '\t' << "Significance" << endl;
204         
205                 int precision = (int)log10(iters);
206                 for(int i=0;i<numGroups;i++){
207                         for(int j=i+1;j<numGroups;j++){
208                                 if(pValueCounts[i][j]){
209                                         cout << setw(20) << left << groupNames[i]+'-'+groupNames[j] << '\t' << setprecision(8) << savedDXYValues[i][j] << '\t' << setprecision(precision) << pValueCounts[i][j]/(float)iters << endl;
210                                         outSum << setw(20) << left << groupNames[i]+'-'+groupNames[j] << '\t' << setprecision(8) << savedDXYValues[i][j] << '\t' << setprecision(precision) << pValueCounts[i][j]/(float)iters << endl;
211                                 }
212                                 else{
213                                         cout << setw(20) << left << groupNames[i]+'-'+groupNames[j] << '\t' << setprecision(8) << savedDXYValues[i][j] << '\t' << '<' <<setprecision(precision) << 1/(float)iters << endl;
214                                         outSum << setw(20) << left << groupNames[i]+'-'+groupNames[j] << '\t' << setprecision(8) << savedDXYValues[i][j] << '\t' << '<' <<setprecision(precision) << 1/(float)iters << endl;
215                                 }
216                                 if(pValueCounts[j][i]){
217                                         cout << setw(20) << left << groupNames[j]+'-'+groupNames[i] << '\t' << setprecision(8) << savedDXYValues[j][i] << '\t' << setprecision (precision) << pValueCounts[j][i]/(float)iters << endl;
218                                         outSum << setw(20) << left << groupNames[j]+'-'+groupNames[i] << '\t' << setprecision(8) << savedDXYValues[j][i] << '\t' << setprecision (precision) << pValueCounts[j][i]/(float)iters << endl;
219                                 }
220                                 else{
221                                         cout << setw(20) << left << groupNames[j]+'-'+groupNames[i] << '\t' << setprecision(8) << savedDXYValues[j][i] << '\t' << '<' <<setprecision (precision) << 1/(float)iters << endl;
222                                         outSum << setw(20) << left << groupNames[j]+'-'+groupNames[i] << '\t' << setprecision(8) << savedDXYValues[j][i] << '\t' << '<' <<setprecision (precision) << 1/(float)iters << endl;
223                                 }
224                         }
225                 }
226                 
227                 
228         }
229         catch(exception& e) {
230                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function printSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
231                 exit(1);
232         }
233         catch(...) {
234                 cout << "An unknown error has occurred in the LibShuffCommand class function printSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
235                 exit(1);
236         }       
237
238
239 //**********************************************************************************************************************
240
241 void LibShuffCommand::setGroups() {
242         try {
243                 //if the user has not entered specific groups to analyze then do them all
244                 if (globaldata->Groups.size() == 0) {
245                         numGroups = globaldata->gGroupmap->getNumGroups();
246                         for (int i=0; i < numGroups; i++) { 
247                                 globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
248                         }
249                 } else {
250                         if (globaldata->getGroups() != "all") {
251                                 //check that groups are valid
252                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
253                                         if (globaldata->gGroupmap->isValidGroup(globaldata->Groups[i]) != true) {
254                                                 cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
255                                                 // erase the invalid group from globaldata->Groups
256                                                 globaldata->Groups.erase (globaldata->Groups.begin()+i);
257                                         }
258                                 }
259                         
260                                 //if the user only entered invalid groups
261                                 if ((globaldata->Groups.size() == 0) || (globaldata->Groups.size() == 1)) { 
262                                         numGroups = globaldata->gGroupmap->getNumGroups();
263                                         for (int i=0; i < numGroups; i++) { 
264                                                 globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
265                                         }
266                                         cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl; 
267                                 } else { numGroups = globaldata->Groups.size(); }
268                         } else { //users wants all groups
269                                 numGroups = globaldata->gGroupmap->getNumGroups();
270                                 globaldata->Groups.clear();
271                                 for (int i=0; i < numGroups; i++) { 
272                                         globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
273                                 }
274                         }
275                 }
276
277                 //sort so labels match
278                 sort(globaldata->Groups.begin(), globaldata->Groups.end());
279                 
280                 //sort
281                 sort(globaldata->gGroupmap->namesOfGroups.begin(), globaldata->gGroupmap->namesOfGroups.end());
282
283                 groupNames = globaldata->Groups;
284
285                 // number of comparisons i.e. with groups A,B,C = AA, AB, AC, BA, BB, BC...;
286 //              for (int i=0; i<numGroups; i++) { 
287 //                      for (int l = 0; l < numGroups; l++) {
288 //                              //set group comparison labels
289 //                              groupComb.push_back(globaldata->Groups[i] + "-" + globaldata->Groups[l]);
290 //                      }
291 //              }
292         }
293         catch(exception& e) {
294                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
295                 exit(1);
296         }
297         catch(...) {
298                 cout << "An unknown error has occurred in the LibShuffCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
299                 exit(1);
300         }
301 }
302
303 /***********************************************************/