]> git.donarmstrong.com Git - mothur.git/blob - libshuffcommand.cpp
documentation
[mothur.git] / libshuffcommand.cpp
1 /*
2  *  libshuffcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 3/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 /* This class is designed to implement an integral form of the Cramer-von Mises statistic.
11         you may refer to the "Integration of Microbial Ecology and Statistics: A Test To Compare Gene Libraries" 
12         paper in Applied and Environmental Microbiology, Sept. 2004, p. 5485-5492 0099-2240/04/$8.00+0  
13         DOI: 10.1128/AEM.70.9.5485-5492.2004 Copyright 2004 American Society for Microbiology for more information. */
14
15
16 #include "libshuffcommand.h"
17
18 //**********************************************************************************************************************
19
20
21 LibShuffCommand::LibShuffCommand(){
22         try {
23                 globaldata = GlobalData::getInstance();
24                 convert(globaldata->getCutOff(), cutOff);
25                 convert(globaldata->getIters(), iters);
26                 convert(globaldata->getStep(), step);
27                 form = globaldata->getForm();
28                 matrix = globaldata->gMatrix;
29                 coverageFile = getRootName(globaldata->getPhylipFile()) + "coverage";
30                 summaryFile = getRootName(globaldata->getPhylipFile()) + "slsummary";
31                 openOutputFile(coverageFile, out);
32                 openOutputFile(summaryFile, outSum);
33                 
34                 //set the groups to be analyzed
35                 setGroups();
36
37                 //file headers for coverage file
38                 out << "D" << '\t';
39                 for (int i = 0; i < groupComb.size(); i++) {
40                         out << "C" + groupComb[i] << '\t';
41                 }
42                 
43                 for (int i = 0; i < numGroups; i++) {
44                         for (int j = 0; j < numGroups; j++) {
45                                 //don't output AA to AA
46                                 if (i != j) {
47                                         out << "Delta" + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
48                                 }
49                         }
50                 }
51                 out << endl;
52
53                 numComp = numGroups*numGroups;
54                 
55                 coverage = new Coverage();
56                 
57         }
58         catch(exception& e) {
59                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function LibShuffCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
60                 exit(1);
61         }
62         catch(...) {
63                 cout << "An unknown error has occurred in the LibShuffCommand class function LibShuffCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
64                 exit(1);
65         }       
66                         
67 }
68
69 //**********************************************************************************************************************
70
71 LibShuffCommand::~LibShuffCommand(){
72         delete coverage;
73 }
74
75 //**********************************************************************************************************************
76
77 int LibShuffCommand::execute(){
78         try {
79                 //deltaValues[0] = scores for the difference between AA and AB.
80                 //cValues[0][0][0] = AA at distance 0.0, cValues[0][0][1] = AB at distance 0.0, cValues[0][0][2] = AC at distance 0.0, cValues[0][1][0] = BA at distance 0.0, cValues[0][1][1] = BB...
81                 Progress* reading;
82                 reading = new Progress("Comparing to random:", iters);
83                 
84                 sumDelta.resize(numComp-numGroups, 0.0);
85                 
86                 matrix->setBounds();
87                 
88                 //load distances
89                 if (form != "discrete") { matrix->getDist(dist); }
90                 else {
91                         float f = 0.0;
92                         while (f <= cutOff) {
93                                 dist.push_back(f);
94                                 f += step;
95                         }
96                 }
97         
98                 /*****************************/
99                 //get values for users matrix
100                 /*****************************/
101                         
102                 //clear out old Values
103                 deltaValues.clear();
104                 deltaValues.resize(dist.size());                        
105                 
106                 coverage->getValues(matrix, cValues, dist, "user");
107                 
108                 //loop through each distance and load rsumdelta
109                 for (int p = 0; p < cValues.size(); p++) {      
110                         //find delta values
111                         int count = 0;
112                         for (int i = 0; i < numGroups; i++) {
113                                 for (int j = 0; j < numGroups; j++) {
114                                         //don't save AA to AA
115                                         if (i != j) {
116                                                 //(Caa - Cab)^2
117                                                 deltaValues[p].push_back((cValues[p][i][i]-cValues[p][i][j]) * (cValues[p][i][i]-cValues[p][i][j]));
118                                                 sumDelta[count] += deltaValues[p][count];
119                                                 count++;
120                                         }
121                                 }
122                         }
123                 }
124                         
125                 printCoverageFile();
126                         
127                 /*******************************************************************************/
128                 //create and score random matrixes finding the sumDelta values for summary file
129                 /******************************************************************************/
130
131                 //initialize rsumDelta
132                 rsumDelta.resize(numComp-numGroups);
133                 for (int l = 0; l < rsumDelta.size(); l++) {
134                         for (int w = 0; w < iters; w++) {
135                                 rsumDelta[l].push_back(0.0);
136                         }
137                 }
138                 
139                 
140                 for (int m = 0; m < iters; m++) {
141                         //generate random matrix in getValues
142                         //values for random matrix
143                 
144                         coverage->getValues(matrix, cValues, dist, "random");
145                         
146                         //loop through each distance and load rsumdelta
147                         for (int p = 0; p < cValues.size(); p++) {
148                                 //find delta values
149                                 int count = 0;
150                                 for (int i = 0; i < numGroups; i++) {
151                                         for (int j = 0; j < numGroups; j++) {
152                                                 //don't save AA to AA
153                                                 if (i != j) {
154                                                         //(Caa - Cab)^2
155                                                         rsumDelta[count][m] += ((cValues[p][i][i]-cValues[p][i][j]) * (cValues[p][i][i]-cValues[p][i][j]));
156                                                         count++;
157                                                 }
158                                         }
159                                 }
160                                 
161                         }
162
163                         //clear out old Values
164                         reading->update(m);
165                         cValues.clear();
166                         
167                 }
168                 
169                 reading->finish();
170                 delete reading;
171                                 
172                 /**********************************************************/
173                 //find the signifigance of the user matrix' sumdelta values
174                 /**********************************************************/
175                 
176                 for (int t = 0; t < rsumDelta.size(); t++) {
177                         //sort rsumDelta t
178                         sort(rsumDelta[t].begin(), rsumDelta[t].end());
179                         
180                         //the index of the score higher than yours is returned 
181                         //so if you have 1000 random matrices the index returned is 100 
182                         //then there are 900 matrices with a score greater then you. 
183                         //giving you a signifigance of 0.900
184                         int index = findIndex(sumDelta[t], t);    
185                         
186                         //the signifigance is the number of trees with the users score or higher 
187                         sumDeltaSig.push_back((iters-index)/(float)iters);
188
189                 }
190                 
191                 printSummaryFile();
192                 
193                 //clear out users groups
194                 globaldata->Groups.clear();
195                 
196                 return 0;
197         }
198         catch(exception& e) {
199                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
200                 exit(1);
201         }
202         catch(...) {
203                 cout << "An unknown error has occurred in the LibShuffCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
204                 exit(1);
205         }       
206 }
207 //**********************************************************************************************************************
208 void LibShuffCommand::printCoverageFile() {
209         try {
210                 //format output
211                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
212                 
213                 //loop through each distance 
214                 for (int p = 0; p < cValues.size(); p++) {
215                         out << setprecision(6) << dist[p] << '\t';
216                         //print out coverage values
217                         for (int i = 0; i < numGroups; i++) {
218                                 for (int j = 0; j < numGroups; j++) {
219                                         out << cValues[p][i][j] << '\t';
220                                 }
221                         }
222                         
223                         for (int h = 0; h < deltaValues[p].size(); h++) {
224                                 out << deltaValues[p][h] << '\t';
225                         }
226                         
227                         out << endl;
228                 }
229                 
230         }
231         catch(exception& e) {
232                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function printCoverageFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
233                 exit(1);
234         }
235         catch(...) {
236                 cout << "An unknown error has occurred in the LibShuffCommand class function printCoverageFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
237                 exit(1);
238         }       
239
240 //**********************************************************************************************************************
241 void LibShuffCommand::printSummaryFile() {
242         try {
243                 //format output
244                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
245                 
246                 for (int i = 0; i < numGroups; i++) {
247                         for (int j = 0; j < numGroups; j++) {
248                                 //don't output AA to AA
249                                 if (i != j) {
250                                         outSum << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
251                                         cout << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
252                                 }
253                         }
254                 }
255                 outSum << endl;
256                 cout << endl;
257                 
258                 //print out delta values
259                 for (int i = 0; i < sumDelta.size(); i++) {
260                         if (sumDeltaSig[i] > (1/(float)iters)) {
261                                 outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
262                                 cout << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
263                         }else {
264                                 outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << '\t';
265                                 cout << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << '\t';
266                         }
267                 }
268                 
269                 outSum << endl;
270                 cout << endl;
271         
272         }
273         catch(exception& e) {
274                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function printSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
275                 exit(1);
276         }
277         catch(...) {
278                 cout << "An unknown error has occurred in the LibShuffCommand class function printSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
279                 exit(1);
280         }       
281
282
283 //**********************************************************************************************************************
284 void LibShuffCommand::setGroups() {
285         try {
286                 //if the user has not entered specific groups to analyze then do them all
287                 if (globaldata->Groups.size() == 0) {
288                         numGroups = globaldata->gGroupmap->getNumGroups();
289                         for (int i=0; i < numGroups; i++) { 
290                                 globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
291                         }
292                 }else {
293                         if (globaldata->getGroups() != "all") {
294                                 //check that groups are valid
295                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
296                                         if (globaldata->gGroupmap->isValidGroup(globaldata->Groups[i]) != true) {
297                                                 cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
298                                                 // erase the invalid group from globaldata->Groups
299                                                 globaldata->Groups.erase (globaldata->Groups.begin()+i);
300                                         }
301                                 }
302                         
303                                 //if the user only entered invalid groups
304                                 if ((globaldata->Groups.size() == 0) || (globaldata->Groups.size() == 1)) { 
305                                         numGroups = globaldata->gGroupmap->getNumGroups();
306                                         for (int i=0; i < numGroups; i++) { 
307                                                 globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
308                                         }
309                                         cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl; 
310                                 }else { numGroups = globaldata->Groups.size(); }
311                         }else { //users wants all groups
312                                 numGroups = globaldata->gGroupmap->getNumGroups();
313                                 globaldata->Groups.clear();
314                                 for (int i=0; i < numGroups; i++) { 
315                                         globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
316                                 }
317                         }
318                 }
319                 
320                 //sort so labels match
321                 sort(globaldata->Groups.begin(), globaldata->Groups.end());
322                 
323                 // number of comparisons i.e. with groups A,B,C = AA, AB, AC, BA, BB, BC...;
324                 for (int i=0; i<numGroups; i++) { 
325                         for (int l = 0; l < numGroups; l++) {
326                                 //set group comparison labels
327                                 groupComb.push_back(globaldata->Groups[i] + "-" + globaldata->Groups[l]);
328                         }
329                 }
330         }
331         catch(exception& e) {
332                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
333                 exit(1);
334         }
335         catch(...) {
336                 cout << "An unknown error has occurred in the LibShuffCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
337                 exit(1);
338         }
339 }
340 /***********************************************************/
341 int LibShuffCommand::findIndex(float score, int index) {
342         try{
343                 for (int i = 0; i < rsumDelta[index].size(); i++) {
344                         if (rsumDelta[index][i] >= score)       {       return i;       }
345                 }
346                 return rsumDelta[index].size();
347         }
348         catch(exception& e) {
349                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
350                 exit(1);
351         }
352         catch(...) {
353                 cout << "An unknown error has occurred in the LibShuffCommand class function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
354                 exit(1);
355         }
356 }
357
358 /***********************************************************/
359