]> git.donarmstrong.com Git - mothur.git/blob - libshuffcommand.cpp
libshuff and updated help to include libshuff and modifications to read.dist.
[mothur.git] / libshuffcommand.cpp
1 /*
2  *  libshuffcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 3/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "libshuffcommand.h"
11
12 //**********************************************************************************************************************
13
14
15 LibShuffCommand::LibShuffCommand(){
16         try {
17                 globaldata = GlobalData::getInstance();
18                 convert(globaldata->getCutOff(), cutOff);
19                 convert(globaldata->getIters(), iters);
20                 convert(globaldata->getStep(), step);
21                 form = globaldata->getForm();
22                 matrix = globaldata->gMatrix;
23                 coverageFile = getRootName(globaldata->getPhylipFile()) + "coverage";
24                 summaryFile = getRootName(globaldata->getPhylipFile()) + "slsummary";
25                 openOutputFile(coverageFile, out);
26                 openOutputFile(summaryFile, outSum);
27                 
28                 //set the groups to be analyzed
29                 setGroups();
30
31                 //file headers for coverage file
32                 out << "D" << '\t';
33                 for (int i = 0; i < groupComb.size(); i++) {
34                         out << "C" + groupComb[i] << '\t';
35                 }
36                 
37                 for (int i = 0; i < numGroups; i++) {
38                         for (int j = 0; j < numGroups; j++) {
39                                 //don't output AA to AA
40                                 if (i != j) {
41                                         out << "Delta" + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
42                                 }
43                         }
44                 }
45                 out << endl;
46
47                 numComp = numGroups*numGroups;
48                 
49                 coverage = new Coverage();
50                 
51         }
52         catch(exception& e) {
53                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function LibShuffCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
54                 exit(1);
55         }
56         catch(...) {
57                 cout << "An unknown error has occurred in the LibShuffCommand class function LibShuffCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
58                 exit(1);
59         }       
60                         
61 }
62
63 //**********************************************************************************************************************
64
65 LibShuffCommand::~LibShuffCommand(){
66         delete coverage;
67 }
68
69 //**********************************************************************************************************************
70
71 int LibShuffCommand::execute(){
72         try {
73                 //deltaValues[0] = scores for the difference between AA and AB.
74                 //cValues[0][0][0] = AA at distance 0.0, cValues[0][0][1] = AB at distance 0.0, cValues[0][0][2] = AC at distance 0.0, cValues[0][1][0] = BA at distance 0.0, cValues[0][1][1] = BB...
75                 
76                 sumDelta.resize(numComp-numGroups, 0.0);
77                 
78                 matrix->setBounds();
79                 
80                 //load distances
81                 if (form != "discrete") { matrix->getDist(dist); }
82                 else {
83                         float f = 0.0;
84                         while (f <= cutOff) {
85                                 dist.push_back(f);
86                                 f += step;
87                         }
88                 }
89         
90                 /*****************************/
91                 //get values for users matrix
92                 /*****************************/
93                         
94                 //clear out old Values
95                 deltaValues.clear();
96                 deltaValues.resize(dist.size());                        
97                 
98                 coverage->getValues(matrix, cValues, dist, "user");
99                 
100                 //loop through each distance and load rsumdelta
101                 for (int p = 0; p < cValues.size(); p++) {      
102                         //find delta values
103                         int count = 0;
104                         for (int i = 0; i < numGroups; i++) {
105                                 for (int j = 0; j < numGroups; j++) {
106                                         //don't save AA to AA
107                                         if (i != j) {
108                                                 //(Caa - Cab)^2
109                                                 deltaValues[p].push_back( (abs(cValues[p][i][i]-cValues[p][i][j]) * abs(cValues[p][i][i]-cValues[p][i][j])) ); 
110                                                 sumDelta[count] += deltaValues[p][count];
111                                                 count++;
112                                         }
113                                 }
114                         }
115                 }
116                         
117                 printCoverageFile();
118                         
119                 /*******************************************************************************/
120                 //create and score random matrixes finding the sumDelta values for summary file
121                 /******************************************************************************/
122
123                 //initialize rsumDelta
124                 rsumDelta.resize(numComp-numGroups);
125                 for (int l = 0; l < rsumDelta.size(); l++) {
126                         for (int w = 0; w < iters; w++) {
127                                 rsumDelta[l].push_back(0.0);
128                         }
129                 }
130                 
131                 
132                 for (int m = 0; m < iters; m++) {
133                         //generate random matrix in getValues
134                         //values for random matrix
135                 
136                         coverage->getValues(matrix, cValues, dist, "random");
137                         
138                         //loop through each distance and load rsumdelta
139                         for (int p = 0; p < cValues.size(); p++) {
140                                 //find delta values
141                                 int count = 0;
142                                 for (int i = 0; i < numGroups; i++) {
143                                         for (int j = 0; j < numGroups; j++) {
144                                                 //don't save AA to AA
145                                                 if (i != j) {
146                                                         //(Caa - Cab)^2
147                                                         rsumDelta[count][m] += ((abs(cValues[p][i][i]-cValues[p][i][j]) * abs(cValues[p][i][i]-cValues[p][i][j])));
148                                                         count++;
149                                                 }
150                                         }
151                                 }
152                                 
153                         }
154 //cout << "iter " << m << endl;
155                         //clear out old Values
156                         cValues.clear();
157                         
158 //cout << "random sum delta for iter " << m << endl;
159 //for (int i = 0; i < rsumDelta.size(); i++) {
160 //      cout << setprecision(6) << rsumDelta[i][m] << '\t';
161 //}
162 //cout << endl;
163
164                 }
165                 
166                                 
167                 /**********************************************************/
168                 //find the signifigance of the user matrix' sumdelta values
169                 /**********************************************************/
170                 
171                 for (int t = 0; t < rsumDelta.size(); t++) {
172                         //sort rsumDelta t
173                         sort(rsumDelta[t].begin(), rsumDelta[t].end());
174                         
175                         //the index of the score higher than yours is returned 
176                         //so if you have 1000 random matrices the index returned is 100 
177                         //then there are 900 matrices with a score greater then you. 
178                         //giving you a signifigance of 0.900
179                         int index = findIndex(sumDelta[t], t);    
180                         
181                         //the signifigance is the number of trees with the users score or higher 
182                         sumDeltaSig.push_back((iters-index)/(float)iters);
183
184                 }
185                 
186                 printSummaryFile();
187                 
188                 //clear out users groups
189                 globaldata->Groups.clear();
190                 
191                 return 0;
192         }
193         catch(exception& e) {
194                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
195                 exit(1);
196         }
197         catch(...) {
198                 cout << "An unknown error has occurred in the LibShuffCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
199                 exit(1);
200         }       
201 }
202 //**********************************************************************************************************************
203 void LibShuffCommand::printCoverageFile() {
204         try {
205                 //format output
206                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
207                 
208                 //loop through each distance 
209                 for (int p = 0; p < cValues.size(); p++) {
210                         out << setprecision(6) << dist[p] << '\t';
211                         //print out coverage values
212                         for (int i = 0; i < numGroups; i++) {
213                                 for (int j = 0; j < numGroups; j++) {
214                                         out << cValues[p][i][j] << '\t';
215                                 }
216                         }
217                         
218                         for (int h = 0; h < deltaValues[p].size(); h++) {
219                                 out << deltaValues[p][h] << '\t';
220                         }
221                         
222                         out << endl;
223                 }
224                 
225         }
226         catch(exception& e) {
227                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function printCoverageFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
228                 exit(1);
229         }
230         catch(...) {
231                 cout << "An unknown error has occurred in the LibShuffCommand class function printCoverageFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
232                 exit(1);
233         }       
234
235 //**********************************************************************************************************************
236 void LibShuffCommand::printSummaryFile() {
237         try {
238                 //format output
239                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
240                 
241                 for (int i = 0; i < numGroups; i++) {
242                         for (int j = 0; j < numGroups; j++) {
243                                 //don't output AA to AA
244                                 if (i != j) {
245                                         outSum << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
246                                         cout << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
247                                 }
248                         }
249                 }
250                 outSum << endl;
251                 cout << endl;
252                 
253                 //print out delta values
254                 for (int i = 0; i < sumDelta.size(); i++) {
255                         if (sumDeltaSig[i] > (1/(float)iters)) {
256                                 outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
257                                 cout << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
258                         }else {
259                                 outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << '\t';
260                                 cout << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << '\t';
261                         }
262                 }
263                 
264                 outSum << endl;
265                 cout << endl;
266         
267         }
268         catch(exception& e) {
269                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function printSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
270                 exit(1);
271         }
272         catch(...) {
273                 cout << "An unknown error has occurred in the LibShuffCommand class function printSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
274                 exit(1);
275         }       
276
277
278 //**********************************************************************************************************************
279 void LibShuffCommand::setGroups() {
280         try {
281                 //if the user has not entered specific groups to analyze then do them all
282                 if (globaldata->Groups.size() == 0) {
283                         numGroups = globaldata->gGroupmap->getNumGroups();
284                         for (int i=0; i < numGroups; i++) { 
285                                 globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
286                         }
287                 }else {
288                         if (globaldata->getGroups() != "all") {
289                                 //check that groups are valid
290                                 for (int i = 0; i < globaldata->Groups.size(); i++) {
291                                         if (globaldata->gGroupmap->isValidGroup(globaldata->Groups[i]) != true) {
292                                                 cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
293                                                 // erase the invalid group from globaldata->Groups
294                                                 globaldata->Groups.erase (globaldata->Groups.begin()+i);
295                                         }
296                                 }
297                         
298                                 //if the user only entered invalid groups
299                                 if ((globaldata->Groups.size() == 0) || (globaldata->Groups.size() == 1)) { 
300                                         numGroups = globaldata->gGroupmap->getNumGroups();
301                                         for (int i=0; i < numGroups; i++) { 
302                                                 globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
303                                         }
304                                         cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl; 
305                                 }else { numGroups = globaldata->Groups.size(); }
306                         }else { //users wants all groups
307                                 numGroups = globaldata->gGroupmap->getNumGroups();
308                                 globaldata->Groups.clear();
309                                 for (int i=0; i < numGroups; i++) { 
310                                         globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
311                                 }
312                         }
313                 }
314                 
315                 //sort so labels match
316                 sort(globaldata->Groups.begin(), globaldata->Groups.end());
317                 
318                 // number of comparisons i.e. with groups A,B,C = AA, AB, AC, BA, BB, BC...;
319                 for (int i=0; i<numGroups; i++) { 
320                         for (int l = 0; l < numGroups; l++) {
321                                 //set group comparison labels
322                                 groupComb.push_back(globaldata->Groups[i] + "-" + globaldata->Groups[l]);
323                         }
324                 }
325         }
326         catch(exception& e) {
327                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
328                 exit(1);
329         }
330         catch(...) {
331                 cout << "An unknown error has occurred in the LibShuffCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
332                 exit(1);
333         }
334 }
335 /***********************************************************/
336 int LibShuffCommand::findIndex(float score, int index) {
337         try{
338                 for (int i = 0; i < rsumDelta[index].size(); i++) {
339                         if (rsumDelta[index][i] >= score)       {       return i;       }
340                 }
341                 return rsumDelta[index].size();
342         }
343         catch(exception& e) {
344                 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
345                 exit(1);
346         }
347         catch(...) {
348                 cout << "An unknown error has occurred in the LibShuffCommand class function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
349                 exit(1);
350         }
351 }
352
353 /***********************************************************/
354