]> git.donarmstrong.com Git - mothur.git/blob - libshuffcommand.cpp
working on libshuff
[mothur.git] / libshuffcommand.cpp
1 /*
2  *  libshuffcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 3/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "libshuffcommand.h"
11
12 //**********************************************************************************************************************
13
14
15 LibShuffCommand::LibShuffCommand(){
16         try {
17                 globaldata = GlobalData::getInstance();
18                 convert(globaldata->getCutOff(), cutOff);
19                 convert(globaldata->getIters(), iters);
20                 convert(globaldata->getStep(), step);
21                 form = globaldata->getForm();
22                 matrix = globaldata->gMatrix;
23                 coverageFile = getRootName(globaldata->getPhylipFile()) + "coverage";
24                 summaryFile = getRootName(globaldata->getPhylipFile()) + "slsummary";
25                 openOutputFile(coverageFile, out);
26                 openOutputFile(summaryFile, outSum);
27                 
28                 //set the groups to be analyzed
29                 setGroups();
30
31                 //file headers for coverage file
32                 out << "D" << '\t';
33                 for (int i = 0; i < groupComb.size(); i++) {
34                         out << "C" + groupComb[i] << '\t';
35                 }
36                 
37                 for (int i = 0; i < numGroups; i++) {
38                         for (int j = 0; j < numGroups; j++) {
39                                 //don't output AA to AA
40                                 if (i != j) {
41                                         out << "Delta" + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
42                                 }
43                         }
44                 }
45                 out << endl;
46
47                 numComp = numGroups*numGroups;
48                 
49                 coverage = new Coverage();
50                 
51         }
52         catch(exception& e) {
53                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function LibShuffCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
54                 exit(1);
55         }
56         catch(...) {
57                 cout << "An unknown error has occurred in the LibShuffCommand class function LibShuffCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
58                 exit(1);
59         }       
60                         
61 }
62
63 //**********************************************************************************************************************
64
65 LibShuffCommand::~LibShuffCommand(){
66         delete coverage;
67 }
68
69 //**********************************************************************************************************************
70
71 int LibShuffCommand::execute(){
72         try {
73                 //deltaValues[0] = scores for the difference between AA and AB.
74                 //cValues[0][0] = AA, cValues[0][1] = AB, cValues[0][2] = AC, cValues[1][0] = BA, cValues[1][1] = BB...
75                 vector<float> dist;
76                 int next;
77                 
78                 sumDelta.resize(numComp-numGroups, 0.0);
79                 
80                 float D = 0.0;
81         
82                 /*****************************/
83                 //get values for users matrix
84                 /*****************************/
85                 matrix->setBounds();
86                 
87                 if (form != "discrete") { matrix->getDist(dist); next = 1; }
88 //cout << "Distances" << endl;
89 //for (int i = 0; i < dist.size(); i++) { cout << dist[i] << " "; }     
90 //cout << endl;
91         
92                 //get values for users matrix
93                 while (D <= cutOff) {
94                         //clear out old Values
95                         deltaValues.clear();                    
96                         coverage->getValues(matrix, D, cValues);
97                         
98                         //find delta values
99                         int count = 0;
100                         for (int i = 0; i < numGroups; i++) {
101                                 for (int j = 0; j < numGroups; j++) {
102                                         //don't save AA to AA
103                                         if (i != j) {
104                                                 //(Caa - Cab)^2
105                                                 deltaValues.push_back( (abs(cValues[i][i]-cValues[i][j]) * abs(cValues[i][i]-cValues[i][j])) ); 
106                                                 sumDelta[count] += deltaValues[count];
107                                                 count++;
108                                         }
109                                 }
110                         }
111                         
112                         printCoverageFile(D);
113                         
114                         //check form
115                         if (form != "discrete") {   
116                                 if (next == dist.size()) { break; }
117                                 else {  D = dist[next];  next++;        }
118                         }else {  D += step;  }
119                         
120
121                 }
122                 
123                 //output sum Deltas
124                 for (int i = 0; i < numGroups; i++) {
125                         for (int j = 0; j < numGroups; j++) {
126                                 //don't output AA to AA
127                                 if (i != j) {
128                                         cout << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
129                                 }
130                         }
131                 }
132                 cout << endl;
133                 
134                 for (int i = 0; i < sumDelta.size(); i++) {
135                         cout << setprecision(6) << sumDelta[i] << '\t';
136                 }
137                 cout << endl;
138                                 
139                 /*******************************************************************************/
140                 //create and score random matrixes finding the sumDelta values for summary file
141                 /******************************************************************************/
142
143                 //initialize rsumDelta
144                 rsumDelta.resize(numComp-numGroups);
145                 for (int l = 0; l < rsumDelta.size(); l++) {
146                         for (int w = 0; w < iters; w++) {
147                                 rsumDelta[l].push_back(0.0);
148                         }
149                 }
150                 
151                 
152                 for (int m = 0; m < iters; m++) {
153                         //generate random matrix in getValues
154                         //values for random matrix
155                         cout << "Iteration " << m+1 << endl;
156                         D = 0.0;
157                         next = 1;
158                         
159                         while (D <= cutOff) {
160                                 coverage->getValues(matrix, D, cValues, "random");
161                         
162                                 //find delta values
163                                 int count = 0;
164                                 for (int i = 0; i < numGroups; i++) {
165                                         for (int j = 0; j < numGroups; j++) {
166                                                 //don't save AA to AA
167                                                 if (i != j) {
168                                                         //(Caa - Cab)^2
169                                                         rsumDelta[count][m] += ((abs(cValues[i][i]-cValues[i][j]) * abs(cValues[i][i]-cValues[i][j])));
170 //cout << "iter " << m << " box " << i << j << " delta = " << ((abs(cValues[i][i]-cValues[i][j]) * abs(cValues[i][i]-cValues[i][j]))) << endl;
171                                                         count++;
172                                                 }
173                                         }
174                                 }
175
176                                 //check form
177                                 if (form != "discrete") {   
178                                         if (next == dist.size()) { break; }
179                                         else {  D = dist[next];  next++;        }
180                                 }else {  D += step;  }
181
182                         
183                                 //clear out old Values
184                                 cValues.clear();
185                         }
186 cout << "random sum delta for iter " << m << endl;
187 for (int i = 0; i < rsumDelta.size(); i++) {
188         cout << setprecision(6) << rsumDelta[i][m] << '\t';
189 }
190 cout << endl;
191
192                 }
193                 
194                 /**********************************************************/
195                 //find the signifigance of the user matrix' sumdelta values
196                 /**********************************************************/
197                 
198                 for (int t = 0; t < rsumDelta.size(); t++) {
199                         //sort rsumDelta t
200                         sort(rsumDelta[t].begin(), rsumDelta[t].end());
201                         
202                         //the index of the score higher than yours is returned 
203                         //so if you have 1000 random matrices the index returned is 100 
204                         //then there are 900 matrices with a score greater then you. 
205                         //giving you a signifigance of 0.900
206                         int index = findIndex(sumDelta[t], t);    
207                         
208                         //the signifigance is the number of trees with the users score or higher 
209                         sumDeltaSig.push_back((iters-index)/(float)iters);
210
211                 }
212                 
213                 printSummaryFile();
214                 
215                 //clear out users groups
216                 globaldata->Groups.clear();
217                 
218                 return 0;
219         }
220         catch(exception& e) {
221                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
222                 exit(1);
223         }
224         catch(...) {
225                 cout << "An unknown error has occurred in the LibShuffCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
226                 exit(1);
227         }       
228 }
229 //**********************************************************************************************************************
230 void LibShuffCommand::printCoverageFile(float d) {
231         try {
232                 //format output
233                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
234                 
235                 out << setprecision(6) << d << '\t';
236                 
237                 //print out coverage values
238                 for (int i = 0; i < numGroups; i++) {
239                         for (int j = 0; j < numGroups; j++) {
240                                 out << cValues[i][j] << '\t';
241                         }
242                 }
243                 
244                 //print out delta values
245                 for (int i = 0; i < deltaValues.size(); i++) {
246                         out << deltaValues[i] << '\t';
247                 }
248                 
249                 out << endl;
250         
251         }
252         catch(exception& e) {
253                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function printCoverageFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
254                 exit(1);
255         }
256         catch(...) {
257                 cout << "An unknown error has occurred in the LibShuffCommand class function printCoverageFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
258                 exit(1);
259         }       
260
261 //**********************************************************************************************************************
262 void LibShuffCommand::printSummaryFile() {
263         try {
264                 //format output
265                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
266                 
267                 for (int i = 0; i < numGroups; i++) {
268                         for (int j = 0; j < numGroups; j++) {
269                                 //don't output AA to AA
270                                 if (i != j) {
271                                         outSum << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
272                                         cout << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
273                                 }
274                         }
275                 }
276                 outSum << endl;
277                 cout << endl;
278                 
279                 //print out delta values
280                 for (int i = 0; i < sumDelta.size(); i++) {
281                         outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
282                         cout << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
283                 }
284                 
285                 outSum << endl;
286                 cout << endl;
287         
288         }
289         catch(exception& e) {
290                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function printSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
291                 exit(1);
292         }
293         catch(...) {
294                 cout << "An unknown error has occurred in the LibShuffCommand class function printSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
295                 exit(1);
296         }       
297
298
299 //**********************************************************************************************************************
300 void LibShuffCommand::setGroups() {
301         try {
302                 //if the user has not entered specific groups to analyze then do them all
303                 if (globaldata->Groups.size() == 0) {
304                         numGroups = globaldata->gGroupmap->getNumGroups();
305                         for (int i=0; i < numGroups; i++) { 
306                                 globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
307                         }
308                 }else {
309                         if (globaldata->getGroups() != "all") {
310                                 //check that groups are valid
311                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
312                                         if (globaldata->gGroupmap->isValidGroup(globaldata->Groups[i]) != true) {
313                                                 cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
314                                                 // erase the invalid group from globaldata->Groups
315                                                 globaldata->Groups.erase (globaldata->Groups.begin()+i);
316                                         }
317                                 }
318                         
319                                 //if the user only entered invalid groups
320                                 if ((globaldata->Groups.size() == 0) || (globaldata->Groups.size() == 1)) { 
321                                         numGroups = globaldata->gGroupmap->getNumGroups();
322                                         for (int i=0; i < numGroups; i++) { 
323                                                 globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
324                                         }
325                                         cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl; 
326                                 }else { numGroups = globaldata->Groups.size(); }
327                         }else { //users wants all groups
328                                 numGroups = globaldata->gGroupmap->getNumGroups();
329                                 globaldata->Groups.clear();
330                                 for (int i=0; i < numGroups; i++) { 
331                                         globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
332                                 }
333                         }
334                 }
335                 
336                 //sort so labels match
337                 sort(globaldata->Groups.begin(), globaldata->Groups.end());
338                 
339                 // number of comparisons i.e. with groups A,B,C = AA, AB, AC, BA, BB, BC...;
340                 for (int i=0; i<numGroups; i++) { 
341                         for (int l = 0; l < numGroups; l++) {
342                                 //set group comparison labels
343                                 groupComb.push_back(globaldata->Groups[i] + "-" + globaldata->Groups[l]);
344                         }
345                 }
346         }
347         catch(exception& e) {
348                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
349                 exit(1);
350         }
351         catch(...) {
352                 cout << "An unknown error has occurred in the LibShuffCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
353                 exit(1);
354         }
355 }
356 /***********************************************************/
357 int LibShuffCommand::findIndex(float score, int index) {
358         try{
359                 for (int i = 0; i < rsumDelta[index].size(); i++) {
360                         if (rsumDelta[index][i] >= score)       {       return i;       }
361                 }
362                 return rsumDelta[index].size();
363         }
364         catch(exception& e) {
365                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
366                 exit(1);
367         }
368         catch(...) {
369                 cout << "An unknown error has occurred in the LibShuffCommand class function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
370                 exit(1);
371         }
372 }
373
374 /***********************************************************/
375