5 * Created by Sarah Westcott on 3/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 /* This class is designed to implement an integral form of the Cramer-von Mises statistic.
11 you may refer to the "Integration of Microbial Ecology and Statistics: A Test To Compare Gene Libraries"
12 paper in Applied and Environmental Microbiology, Sept. 2004, p. 5485-5492 0099-2240/04/$8.00+0
13 DOI: 10.1128/AEM.70.9.5485-5492.2004 Copyright 2004 American Society for Microbiology for more information. */
16 #include "libshuffcommand.h"
18 //**********************************************************************************************************************
21 LibShuffCommand::LibShuffCommand(){
23 globaldata = GlobalData::getInstance();
24 convert(globaldata->getCutOff(), cutOff);
25 convert(globaldata->getIters(), iters);
26 convert(globaldata->getStep(), step);
27 form = globaldata->getForm();
28 matrix = globaldata->gMatrix;
29 coverageFile = getRootName(globaldata->getPhylipFile()) + "coverage";
30 summaryFile = getRootName(globaldata->getPhylipFile()) + "slsummary";
31 openOutputFile(coverageFile, out);
32 openOutputFile(summaryFile, outSum);
34 //set the groups to be analyzed
37 //file headers for coverage file
39 for (int i = 0; i < groupComb.size(); i++) {
40 out << "C" + groupComb[i] << '\t';
43 for (int i = 0; i < numGroups; i++) {
44 for (int j = 0; j < numGroups; j++) {
45 //don't output AA to AA
47 out << "Delta" + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
53 numComp = numGroups*numGroups;
55 coverage = new Coverage();
59 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function LibShuffCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
63 cout << "An unknown error has occurred in the LibShuffCommand class function LibShuffCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
69 //**********************************************************************************************************************
71 LibShuffCommand::~LibShuffCommand(){
75 //**********************************************************************************************************************
77 int LibShuffCommand::execute(){
79 //deltaValues[0] = scores for the difference between AA and AB.
80 //cValues[0][0][0] = AA at distance 0.0, cValues[0][0][1] = AB at distance 0.0, cValues[0][0][2] = AC at distance 0.0, cValues[0][1][0] = BA at distance 0.0, cValues[0][1][1] = BB...
82 reading = new Progress("Comparing to random:", iters);
84 sumDelta.resize(numComp-numGroups, 0.0);
89 if (form != "discrete") { matrix->getDist(dist); }
98 /*****************************/
99 //get values for users matrix
100 /*****************************/
102 //clear out old Values
104 deltaValues.resize(dist.size());
106 coverage->getValues(matrix, cValues, dist);
110 //loop through each distance and load sumdelta
111 for (int p = 0; p < cValues.size(); p++) {
114 for (int i = 0; i < numGroups; i++) {
115 for (int j = 0; j < numGroups; j++) {
116 //don't save AA to AA
118 //(Caa - Cab)^2 * distDiff
119 deltaValues[p].push_back(((cValues[p][i][i]-cValues[p][i][j]) * (cValues[p][i][i]-cValues[p][i][j])) * distDiff); //* distDiff
120 sumDelta[count] += deltaValues[p][count];
125 if (p < cValues.size() - 1) {
126 distDiff = dist[p+1] - dist[p];
132 /*******************************************************************************/
133 //create and score random matrixes finding the sumDelta values for summary file
134 /******************************************************************************/
136 //initialize rsumDelta
137 rsumDelta.resize(numComp-numGroups);
138 for (int l = 0; l < rsumDelta.size(); l++) {
139 for (int w = 0; w < iters; w++) {
140 rsumDelta[l].push_back(0.0);
145 for (int m = 0; m < iters; m++) {
146 //generate random matrix in getValues
147 //values for random matrix
149 coverage->getValues(matrix, cValues, dist, "random");
151 //loop through each distance and load rsumdelta
152 for (int p = 0; p < cValues.size(); p++) {
155 for (int i = 0; i < numGroups; i++) {
156 for (int j = 0; j < numGroups; j++) {
157 //don't save AA to AA
159 //rsumDelta[3][500] = the sum of the delta scores for BB-BC for random matrix # 500.
160 rsumDelta[count][m] += cValues[p][i][j]; // where cValues[p][0][1] = delta value at distance p of AA-AB, cValues[p][1][2] = delta value at distance p of BB-BC.
167 //clear out old Values
176 /**********************************************************/
177 //find the signifigance of the user matrix' sumdelta values
178 /**********************************************************/
180 for (int t = 0; t < rsumDelta.size(); t++) {
182 sort(rsumDelta[t].begin(), rsumDelta[t].end());
184 //the index of the score higher than yours is returned
185 //so if you have 1000 random matrices the index returned is 100
186 //then there are 900 matrices with a score greater then you.
187 //giving you a signifigance of 0.900
188 int index = findIndex(sumDelta[t], t);
190 //the signifigance is the number of trees with the users score or higher
191 sumDeltaSig.push_back((iters-index)/(float)iters);
197 //clear out users groups
198 globaldata->Groups.clear();
202 catch(exception& e) {
203 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
207 cout << "An unknown error has occurred in the LibShuffCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
211 //**********************************************************************************************************************
212 void LibShuffCommand::printCoverageFile() {
215 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
217 //loop through each distance
218 for (int p = 0; p < cValues.size(); p++) {
219 out << setprecision(6) << dist[p] << '\t';
220 //print out coverage values
221 for (int i = 0; i < numGroups; i++) {
222 for (int j = 0; j < numGroups; j++) {
223 out << cValues[p][i][j] << '\t';
227 for (int h = 0; h < deltaValues[p].size(); h++) {
228 out << deltaValues[p][h] << '\t';
235 catch(exception& e) {
236 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function printCoverageFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
240 cout << "An unknown error has occurred in the LibShuffCommand class function printCoverageFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
244 //**********************************************************************************************************************
245 void LibShuffCommand::printSummaryFile() {
248 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
250 for (int i = 0; i < numGroups; i++) {
251 for (int j = 0; j < numGroups; j++) {
252 //don't output AA to AA
254 outSum << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
255 cout << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
262 //print out delta values
263 for (int i = 0; i < sumDelta.size(); i++) {
264 if (sumDeltaSig[i] > (1/(float)iters)) {
265 outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
266 cout << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
268 outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << '\t';
269 cout << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << '\t';
277 catch(exception& e) {
278 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function printSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
282 cout << "An unknown error has occurred in the LibShuffCommand class function printSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
287 //**********************************************************************************************************************
288 void LibShuffCommand::setGroups() {
290 //if the user has not entered specific groups to analyze then do them all
291 if (globaldata->Groups.size() == 0) {
292 numGroups = globaldata->gGroupmap->getNumGroups();
293 for (int i=0; i < numGroups; i++) {
294 globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
297 if (globaldata->getGroups() != "all") {
298 //check that groups are valid
299 for (int i = 0; i < globaldata->Groups.size(); i++) {
300 if (globaldata->gGroupmap->isValidGroup(globaldata->Groups[i]) != true) {
301 cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
302 // erase the invalid group from globaldata->Groups
303 globaldata->Groups.erase (globaldata->Groups.begin()+i);
307 //if the user only entered invalid groups
308 if ((globaldata->Groups.size() == 0) || (globaldata->Groups.size() == 1)) {
309 numGroups = globaldata->gGroupmap->getNumGroups();
310 for (int i=0; i < numGroups; i++) {
311 globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
313 cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl;
314 }else { numGroups = globaldata->Groups.size(); }
315 }else { //users wants all groups
316 numGroups = globaldata->gGroupmap->getNumGroups();
317 globaldata->Groups.clear();
318 for (int i=0; i < numGroups; i++) {
319 globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
324 //sort so labels match
325 sort(globaldata->Groups.begin(), globaldata->Groups.end());
328 sort(globaldata->gGroupmap->namesOfGroups.begin(), globaldata->gGroupmap->namesOfGroups.end());
331 // number of comparisons i.e. with groups A,B,C = AA, AB, AC, BA, BB, BC...;
332 for (int i=0; i<numGroups; i++) {
333 for (int l = 0; l < numGroups; l++) {
334 //set group comparison labels
335 groupComb.push_back(globaldata->Groups[i] + "-" + globaldata->Groups[l]);
339 catch(exception& e) {
340 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
344 cout << "An unknown error has occurred in the LibShuffCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
348 /***********************************************************/
349 int LibShuffCommand::findIndex(float score, int index) {
351 for (int i = 0; i < rsumDelta[index].size(); i++) {
352 if (rsumDelta[index][i] >= score) { return i; }
354 return rsumDelta[index].size();
356 catch(exception& e) {
357 cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
361 cout << "An unknown error has occurred in the LibShuffCommand class function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
366 /***********************************************************/