]> git.donarmstrong.com Git - mothur.git/blob - libshuffcommand.cpp
fixed libshuff signif. values
[mothur.git] / libshuffcommand.cpp
1 /*
2  *  libshuffcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 3/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 /* This class is designed to implement an integral form of the Cramer-von Mises statistic.
11         you may refer to the "Integration of Microbial Ecology and Statistics: A Test To Compare Gene Libraries" 
12         paper in Applied and Environmental Microbiology, Sept. 2004, p. 5485-5492 0099-2240/04/$8.00+0  
13         DOI: 10.1128/AEM.70.9.5485-5492.2004 Copyright 2004 American Society for Microbiology for more information. */
14
15
16 #include "libshuffcommand.h"
17
18 //**********************************************************************************************************************
19
20
21 LibShuffCommand::LibShuffCommand(){
22         try {
23                 globaldata = GlobalData::getInstance();
24                 convert(globaldata->getCutOff(), cutOff);
25                 convert(globaldata->getIters(), iters);
26                 convert(globaldata->getStep(), step);
27                 form = globaldata->getForm();
28                 matrix = globaldata->gMatrix;
29                 coverageFile = getRootName(globaldata->getPhylipFile()) + "coverage";
30                 summaryFile = getRootName(globaldata->getPhylipFile()) + "slsummary";
31                 openOutputFile(coverageFile, out);
32                 openOutputFile(summaryFile, outSum);
33                 
34                 //set the groups to be analyzed
35                 setGroups();
36
37                 //file headers for coverage file
38                 out << "D" << '\t';
39                 for (int i = 0; i < groupComb.size(); i++) {
40                         out << "C" + groupComb[i] << '\t';
41                 }
42                 
43                 for (int i = 0; i < numGroups; i++) {
44                         for (int j = 0; j < numGroups; j++) {
45                                 //don't output AA to AA
46                                 if (i != j) {
47                                         out << "Delta" + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
48                                 }
49                         }
50                 }
51                 out << endl;
52
53                 numComp = numGroups*numGroups;
54                 
55                 coverage = new Coverage();
56                 
57         }
58         catch(exception& e) {
59                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function LibShuffCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
60                 exit(1);
61         }
62         catch(...) {
63                 cout << "An unknown error has occurred in the LibShuffCommand class function LibShuffCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
64                 exit(1);
65         }       
66                         
67 }
68
69 //**********************************************************************************************************************
70
71 LibShuffCommand::~LibShuffCommand(){
72         delete coverage;
73 }
74
75 //**********************************************************************************************************************
76
77 int LibShuffCommand::execute(){
78         try {
79                 //deltaValues[0] = scores for the difference between AA and AB.
80                 //cValues[0][0][0] = AA at distance 0.0, cValues[0][0][1] = AB at distance 0.0, cValues[0][0][2] = AC at distance 0.0, cValues[0][1][0] = BA at distance 0.0, cValues[0][1][1] = BB...
81                 Progress* reading;
82                 reading = new Progress("Comparing to random:", iters);
83                 
84                 sumDelta.resize(numComp-numGroups, 0.0);
85                 
86                 matrix->setBounds();
87                 
88                 //load distances
89                 if (form != "discrete") { matrix->getDist(dist); }
90                 else {
91                         float f = 0.0;
92                         while (f <= cutOff) {
93                                 dist.push_back(f);
94                                 f += step;
95                         }
96                 }
97         
98                 /*****************************/
99                 //get values for users matrix
100                 /*****************************/
101                         
102                 //clear out old Values
103                 deltaValues.clear();
104                 deltaValues.resize(dist.size());                        
105                 
106                 coverage->getValues(matrix, cValues, dist);
107                 
108                 float distDiff = 0;
109                 
110                 //loop through each distance and load sumdelta
111                 for (int p = 0; p < cValues.size(); p++) {      
112                         //find delta values
113                         int count = 0;
114                         for (int i = 0; i < numGroups; i++) {
115                                 for (int j = 0; j < numGroups; j++) {
116                                         //don't save AA to AA
117                                         if (i != j) {
118                                                 //(Caa - Cab)^2 * distDiff
119                                                 deltaValues[p].push_back(((cValues[p][i][i]-cValues[p][i][j]) * (cValues[p][i][i]-cValues[p][i][j])) * distDiff); //* distDiff
120                                                 sumDelta[count] += deltaValues[p][count];
121                                                 count++;
122                                         }
123                                 }
124                         }
125                         if (p < cValues.size() - 1) {
126                                 distDiff = dist[p+1] - dist[p]; 
127                         }
128                 }
129                         
130                 printCoverageFile();
131                         
132                 /*******************************************************************************/
133                 //create and score random matrixes finding the sumDelta values for summary file
134                 /******************************************************************************/
135
136                 //initialize rsumDelta
137                 rsumDelta.resize(numComp-numGroups);
138                 for (int l = 0; l < rsumDelta.size(); l++) {
139                         for (int w = 0; w < iters; w++) {
140                                 rsumDelta[l].push_back(0.0);
141                         }
142                 }
143                 
144                 
145                 for (int m = 0; m < iters; m++) {
146                         //generate random matrix in getValues
147                         //values for random matrix
148                 
149                         coverage->getValues(matrix, cValues, dist, "random");
150                         
151                         //loop through each distance and load rsumdelta
152                         for (int p = 0; p < cValues.size(); p++) {
153                                 //find delta values
154                                 int count = 0;
155                                 for (int i = 0; i < numGroups; i++) {
156                                         for (int j = 0; j < numGroups; j++) {
157                                                 //don't save AA to AA
158                                                 if (i != j) {
159                                                         //rsumDelta[3][500] = the sum of the delta scores for BB-BC for random matrix # 500.
160                                                         rsumDelta[count][m] += cValues[p][i][j];  // where cValues[p][0][1] = delta value at distance p of AA-AB, cValues[p][1][2] = delta value at distance p of BB-BC. 
161                                                         count++;
162                                                 }
163                                         }
164                                 }
165                         }
166
167                         //clear out old Values
168                         reading->update(m);
169                         cValues.clear();
170                         
171                 }
172                 
173                 reading->finish();
174                 delete reading;
175                                 
176                 /**********************************************************/
177                 //find the signifigance of the user matrix' sumdelta values
178                 /**********************************************************/
179                 
180                 for (int t = 0; t < rsumDelta.size(); t++) {
181                         //sort rsumDelta t
182                         sort(rsumDelta[t].begin(), rsumDelta[t].end());
183                         
184                         //the index of the score higher than yours is returned 
185                         //so if you have 1000 random matrices the index returned is 100 
186                         //then there are 900 matrices with a score greater then you. 
187                         //giving you a signifigance of 0.900
188                         int index = findIndex(sumDelta[t], t);    
189                         
190                         //the signifigance is the number of trees with the users score or higher 
191                         sumDeltaSig.push_back((iters-index)/(float)iters);
192
193                 }
194                 
195                 printSummaryFile();
196                 
197                 //clear out users groups
198                 globaldata->Groups.clear();
199                 
200                 return 0;
201         }
202         catch(exception& e) {
203                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
204                 exit(1);
205         }
206         catch(...) {
207                 cout << "An unknown error has occurred in the LibShuffCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
208                 exit(1);
209         }       
210 }
211 //**********************************************************************************************************************
212 void LibShuffCommand::printCoverageFile() {
213         try {
214                 //format output
215                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
216                 
217                 //loop through each distance 
218                 for (int p = 0; p < cValues.size(); p++) {
219                         out << setprecision(6) << dist[p] << '\t';
220                         //print out coverage values
221                         for (int i = 0; i < numGroups; i++) {
222                                 for (int j = 0; j < numGroups; j++) {
223                                         out << cValues[p][i][j] << '\t';
224                                 }
225                         }
226                         
227                         for (int h = 0; h < deltaValues[p].size(); h++) {
228                                 out << deltaValues[p][h] << '\t';
229                         }
230                         
231                         out << endl;
232                 }
233                 
234         }
235         catch(exception& e) {
236                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function printCoverageFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
237                 exit(1);
238         }
239         catch(...) {
240                 cout << "An unknown error has occurred in the LibShuffCommand class function printCoverageFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
241                 exit(1);
242         }       
243
244 //**********************************************************************************************************************
245 void LibShuffCommand::printSummaryFile() {
246         try {
247                 //format output
248                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
249                 
250                 for (int i = 0; i < numGroups; i++) {
251                         for (int j = 0; j < numGroups; j++) {
252                                 //don't output AA to AA
253                                 if (i != j) {
254                                         outSum << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
255                                         cout << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
256                                 }
257                         }
258                 }
259                 outSum << endl;
260                 cout << endl;
261                 
262                 //print out delta values
263                 for (int i = 0; i < sumDelta.size(); i++) {
264                         if (sumDeltaSig[i] > (1/(float)iters)) {
265                                 outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
266                                 cout << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
267                         }else {
268                                 outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << '\t';
269                                 cout << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << '\t';
270                         }
271                 }
272                 
273                 outSum << endl;
274                 cout << endl;
275         
276         }
277         catch(exception& e) {
278                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function printSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
279                 exit(1);
280         }
281         catch(...) {
282                 cout << "An unknown error has occurred in the LibShuffCommand class function printSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
283                 exit(1);
284         }       
285
286
287 //**********************************************************************************************************************
288 void LibShuffCommand::setGroups() {
289         try {
290                 //if the user has not entered specific groups to analyze then do them all
291                 if (globaldata->Groups.size() == 0) {
292                         numGroups = globaldata->gGroupmap->getNumGroups();
293                         for (int i=0; i < numGroups; i++) { 
294                                 globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
295                         }
296                 }else {
297                         if (globaldata->getGroups() != "all") {
298                                 //check that groups are valid
299                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
300                                         if (globaldata->gGroupmap->isValidGroup(globaldata->Groups[i]) != true) {
301                                                 cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
302                                                 // erase the invalid group from globaldata->Groups
303                                                 globaldata->Groups.erase (globaldata->Groups.begin()+i);
304                                         }
305                                 }
306                         
307                                 //if the user only entered invalid groups
308                                 if ((globaldata->Groups.size() == 0) || (globaldata->Groups.size() == 1)) { 
309                                         numGroups = globaldata->gGroupmap->getNumGroups();
310                                         for (int i=0; i < numGroups; i++) { 
311                                                 globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
312                                         }
313                                         cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl; 
314                                 }else { numGroups = globaldata->Groups.size(); }
315                         }else { //users wants all groups
316                                 numGroups = globaldata->gGroupmap->getNumGroups();
317                                 globaldata->Groups.clear();
318                                 for (int i=0; i < numGroups; i++) { 
319                                         globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
320                                 }
321                         }
322                 }
323                 
324                 //sort so labels match
325                 sort(globaldata->Groups.begin(), globaldata->Groups.end());
326                 
327                 //sort
328                 sort(globaldata->gGroupmap->namesOfGroups.begin(), globaldata->gGroupmap->namesOfGroups.end());
329
330                 
331                 // number of comparisons i.e. with groups A,B,C = AA, AB, AC, BA, BB, BC...;
332                 for (int i=0; i<numGroups; i++) { 
333                         for (int l = 0; l < numGroups; l++) {
334                                 //set group comparison labels
335                                 groupComb.push_back(globaldata->Groups[i] + "-" + globaldata->Groups[l]);
336                         }
337                 }
338         }
339         catch(exception& e) {
340                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
341                 exit(1);
342         }
343         catch(...) {
344                 cout << "An unknown error has occurred in the LibShuffCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
345                 exit(1);
346         }
347 }
348 /***********************************************************/
349 int LibShuffCommand::findIndex(float score, int index) {
350         try{
351                 for (int i = 0; i < rsumDelta[index].size(); i++) {
352                         if (rsumDelta[index][i] >= score)       {       return i;       }
353                 }
354                 return rsumDelta[index].size();
355         }
356         catch(exception& e) {
357                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
358                 exit(1);
359         }
360         catch(...) {
361                 cout << "An unknown error has occurred in the LibShuffCommand class function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
362                 exit(1);
363         }
364 }
365
366 /***********************************************************/
367