]> git.donarmstrong.com Git - mothur.git/blob - libshuffcommand.cpp
put back no command and worked on libshuff
[mothur.git] / libshuffcommand.cpp
1 /*
2  *  libshuffcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 3/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 /* This class is designed to implement an integral form of the Cramer-von Mises statistic.
11         you may refer to the "Integration of Microbial Ecology and Statistics: A Test To Compare Gene Libraries" 
12         paper in Applied and Environmental Microbiology, Sept. 2004, p. 5485-5492 0099-2240/04/$8.00+0  
13         DOI: 10.1128/AEM.70.9.5485-5492.2004 Copyright 2004 American Society for Microbiology for more information. */
14
15
16 #include "libshuffcommand.h"
17
18 //**********************************************************************************************************************
19
20
21 LibShuffCommand::LibShuffCommand(){
22         try {
23                 globaldata = GlobalData::getInstance();
24                 convert(globaldata->getCutOff(), cutOff);
25                 convert(globaldata->getIters(), iters);
26                 convert(globaldata->getStep(), step);
27                 form = globaldata->getForm();
28                 matrix = globaldata->gMatrix;
29                 coverageFile = getRootName(globaldata->getPhylipFile()) + "coverage";
30                 summaryFile = getRootName(globaldata->getPhylipFile()) + "slsummary";
31                 openOutputFile(coverageFile, out);
32                 openOutputFile(summaryFile, outSum);
33                 
34                 //set the groups to be analyzed
35                 setGroups();
36
37                 //file headers for coverage file
38                 out << "D" << '\t';
39                 for (int i = 0; i < groupComb.size(); i++) {
40                         out << "C" + groupComb[i] << '\t';
41                 }
42                 
43                 for (int i = 0; i < numGroups; i++) {
44                         for (int j = 0; j < numGroups; j++) {
45                                 //don't output AA to AA
46                                 if (i != j) {
47                                         out << "Delta" + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
48                                 }
49                         }
50                 }
51                 out << endl;
52
53                 numComp = numGroups*numGroups;
54                 
55                 coverage = new Coverage();
56                 
57         }
58         catch(exception& e) {
59                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function LibShuffCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
60                 exit(1);
61         }
62         catch(...) {
63                 cout << "An unknown error has occurred in the LibShuffCommand class function LibShuffCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
64                 exit(1);
65         }       
66                         
67 }
68
69 //**********************************************************************************************************************
70
71 LibShuffCommand::~LibShuffCommand(){
72         delete coverage;
73 }
74
75 //**********************************************************************************************************************
76
77 int LibShuffCommand::execute(){
78         try {
79                 //deltaValues[0] = scores for the difference between AA and AB.
80                 //cValues[0][0][0] = AA at distance 0.0, cValues[0][0][1] = AB at distance 0.0, cValues[0][0][2] = AC at distance 0.0, cValues[0][1][0] = BA at distance 0.0, cValues[0][1][1] = BB...
81                 Progress* reading;
82                 reading = new Progress("Comparing to random:", iters);
83                 
84                 sumDelta.resize(numComp-numGroups, 0.0);
85                 
86                 matrix->setBounds();
87                 
88                 //load distances
89                 if (form != "discrete") { matrix->getDist(dist); }
90                 else {
91                         float f = 0.0;
92                         while (f <= cutOff) {
93                                 dist.push_back(f);
94                                 f += step;
95                         }
96                 }
97         
98                 /*****************************/
99                 //get values for users matrix
100                 /*****************************/
101                         
102                 //clear out old Values
103                 deltaValues.clear();
104                 deltaValues.resize(dist.size());                        
105                 
106                 coverage->getValues(matrix, cValues, dist, "user");
107                 
108                 float distDiff = dist[0];
109                 
110                 //loop through each distance and load rsumdelta
111                 for (int p = 0; p < cValues.size(); p++) {      
112                         //find delta values
113                         int count = 0;
114                         for (int i = 0; i < numGroups; i++) {
115                                 for (int j = 0; j < numGroups; j++) {
116                                         //don't save AA to AA
117                                         if (i != j) {
118                                                 //(Caa - Cab)^2
119                                                 deltaValues[p].push_back(((cValues[p][i][i]-cValues[p][i][j]) * (cValues[p][i][i]-cValues[p][i][j])) * distDiff);
120                                                 sumDelta[count] += deltaValues[p][count];
121                                                 count++;
122                                         }
123                                 }
124                         }
125                         if (p < cValues.size() - 1) {
126                                         distDiff = dist[p+1] - dist[p]; 
127 //cout << distDiff << endl;
128                         }
129                 }
130                         
131                 printCoverageFile();
132                         
133                 /*******************************************************************************/
134                 //create and score random matrixes finding the sumDelta values for summary file
135                 /******************************************************************************/
136
137                 //initialize rsumDelta
138                 rsumDelta.resize(numComp-numGroups);
139                 for (int l = 0; l < rsumDelta.size(); l++) {
140                         for (int w = 0; w < iters; w++) {
141                                 rsumDelta[l].push_back(0.0);
142                         }
143                 }
144                 
145                 
146                 for (int m = 0; m < iters; m++) {
147                         //generate random matrix in getValues
148                         //values for random matrix
149                 
150                         coverage->getValues(matrix, cValues, dist, "random");
151                         
152                         distDiff = 1;
153                         
154                         //loop through each distance and load rsumdelta
155                         for (int p = 0; p < cValues.size(); p++) {
156                                 //find delta values
157                                 int count = 0;
158                                 for (int i = 0; i < numGroups; i++) {
159                                         for (int j = 0; j < numGroups; j++) {
160                                                 //don't save AA to AA
161                                                 if (i != j) {
162                                                         //(Caa - Cab)^2
163                                                         rsumDelta[count][m] += (((cValues[p][i][i]-cValues[p][i][j]) * (cValues[p][i][i]-cValues[p][i][j])) * distDiff);
164                                                         count++;
165                                                 }
166                                         }
167                                 }
168                                 
169                                 if (p < cValues.size() - 1) {
170                                         distDiff = dist[p+1] - dist[p]; 
171                                 }
172                         }
173
174                         //clear out old Values
175                         reading->update(m);
176                         cValues.clear();
177                         
178                 }
179                 
180                 reading->finish();
181                 delete reading;
182                                 
183                 /**********************************************************/
184                 //find the signifigance of the user matrix' sumdelta values
185                 /**********************************************************/
186                 
187                 for (int t = 0; t < rsumDelta.size(); t++) {
188                         //sort rsumDelta t
189                         sort(rsumDelta[t].begin(), rsumDelta[t].end());
190                         
191                         //the index of the score higher than yours is returned 
192                         //so if you have 1000 random matrices the index returned is 100 
193                         //then there are 900 matrices with a score greater then you. 
194                         //giving you a signifigance of 0.900
195                         int index = findIndex(sumDelta[t], t);    
196                         
197                         //the signifigance is the number of trees with the users score or higher 
198                         sumDeltaSig.push_back((iters-index)/(float)iters);
199
200                 }
201                 
202                 printSummaryFile();
203                 
204                 //clear out users groups
205                 globaldata->Groups.clear();
206                 
207                 return 0;
208         }
209         catch(exception& e) {
210                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
211                 exit(1);
212         }
213         catch(...) {
214                 cout << "An unknown error has occurred in the LibShuffCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
215                 exit(1);
216         }       
217 }
218 //**********************************************************************************************************************
219 void LibShuffCommand::printCoverageFile() {
220         try {
221                 //format output
222                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
223                 
224                 //loop through each distance 
225                 for (int p = 0; p < cValues.size(); p++) {
226                         out << setprecision(6) << dist[p] << '\t';
227                         //print out coverage values
228                         for (int i = 0; i < numGroups; i++) {
229                                 for (int j = 0; j < numGroups; j++) {
230                                         out << cValues[p][i][j] << '\t';
231                                 }
232                         }
233                         
234                         for (int h = 0; h < deltaValues[p].size(); h++) {
235                                 out << deltaValues[p][h] << '\t';
236                         }
237                         
238                         out << endl;
239                 }
240                 
241         }
242         catch(exception& e) {
243                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function printCoverageFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
244                 exit(1);
245         }
246         catch(...) {
247                 cout << "An unknown error has occurred in the LibShuffCommand class function printCoverageFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
248                 exit(1);
249         }       
250
251 //**********************************************************************************************************************
252 void LibShuffCommand::printSummaryFile() {
253         try {
254                 //format output
255                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
256                 
257                 for (int i = 0; i < numGroups; i++) {
258                         for (int j = 0; j < numGroups; j++) {
259                                 //don't output AA to AA
260                                 if (i != j) {
261                                         outSum << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
262                                         cout << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
263                                 }
264                         }
265                 }
266                 outSum << endl;
267                 cout << endl;
268                 
269                 //print out delta values
270                 for (int i = 0; i < sumDelta.size(); i++) {
271                         if (sumDeltaSig[i] > (1/(float)iters)) {
272                                 outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
273                                 cout << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
274                         }else {
275                                 outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << '\t';
276                                 cout << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << '\t';
277                         }
278                 }
279                 
280                 outSum << endl;
281                 cout << endl;
282         
283         }
284         catch(exception& e) {
285                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function printSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
286                 exit(1);
287         }
288         catch(...) {
289                 cout << "An unknown error has occurred in the LibShuffCommand class function printSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
290                 exit(1);
291         }       
292
293
294 //**********************************************************************************************************************
295 void LibShuffCommand::setGroups() {
296         try {
297                 //if the user has not entered specific groups to analyze then do them all
298                 if (globaldata->Groups.size() == 0) {
299                         numGroups = globaldata->gGroupmap->getNumGroups();
300                         for (int i=0; i < numGroups; i++) { 
301                                 globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
302                         }
303                 }else {
304                         if (globaldata->getGroups() != "all") {
305                                 //check that groups are valid
306                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
307                                         if (globaldata->gGroupmap->isValidGroup(globaldata->Groups[i]) != true) {
308                                                 cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
309                                                 // erase the invalid group from globaldata->Groups
310                                                 globaldata->Groups.erase (globaldata->Groups.begin()+i);
311                                         }
312                                 }
313                         
314                                 //if the user only entered invalid groups
315                                 if ((globaldata->Groups.size() == 0) || (globaldata->Groups.size() == 1)) { 
316                                         numGroups = globaldata->gGroupmap->getNumGroups();
317                                         for (int i=0; i < numGroups; i++) { 
318                                                 globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
319                                         }
320                                         cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl; 
321                                 }else { numGroups = globaldata->Groups.size(); }
322                         }else { //users wants all groups
323                                 numGroups = globaldata->gGroupmap->getNumGroups();
324                                 globaldata->Groups.clear();
325                                 for (int i=0; i < numGroups; i++) { 
326                                         globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
327                                 }
328                         }
329                 }
330                 
331                 //sort so labels match
332                 sort(globaldata->Groups.begin(), globaldata->Groups.end());
333                 
334                 // number of comparisons i.e. with groups A,B,C = AA, AB, AC, BA, BB, BC...;
335                 for (int i=0; i<numGroups; i++) { 
336                         for (int l = 0; l < numGroups; l++) {
337                                 //set group comparison labels
338                                 groupComb.push_back(globaldata->Groups[i] + "-" + globaldata->Groups[l]);
339                         }
340                 }
341         }
342         catch(exception& e) {
343                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
344                 exit(1);
345         }
346         catch(...) {
347                 cout << "An unknown error has occurred in the LibShuffCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
348                 exit(1);
349         }
350 }
351 /***********************************************************/
352 int LibShuffCommand::findIndex(float score, int index) {
353         try{
354                 for (int i = 0; i < rsumDelta[index].size(); i++) {
355                         if (rsumDelta[index][i] >= score)       {       return i;       }
356                 }
357                 return rsumDelta[index].size();
358         }
359         catch(exception& e) {
360                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
361                 exit(1);
362         }
363         catch(...) {
364                 cout << "An unknown error has occurred in the LibShuffCommand class function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
365                 exit(1);
366         }
367 }
368
369 /***********************************************************/
370