]> git.donarmstrong.com Git - mothur.git/blobdiff - libshuffcommand.cpp
documentation
[mothur.git] / libshuffcommand.cpp
index a34fb3602c2dc5b3b6ce6b9ec01ef2ed6f35eab3..a51b3270f855062a0e2ee92fab63779b38b50877 100644 (file)
@@ -7,6 +7,12 @@
  *
  */
 
+/* This class is designed to implement an integral form of the Cramer-von Mises statistic.
+       you may refer to the "Integration of Microbial Ecology and Statistics: A Test To Compare Gene Libraries" 
+       paper in Applied and Environmental Microbiology, Sept. 2004, p. 5485-5492 0099-2240/04/$8.00+0  
+       DOI: 10.1128/AEM.70.9.5485-5492.2004 Copyright 2004 American Society for Microbiology for more information. */
+
+
 #include "libshuffcommand.h"
 
 //**********************************************************************************************************************
@@ -17,6 +23,8 @@ LibShuffCommand::LibShuffCommand(){
                globaldata = GlobalData::getInstance();
                convert(globaldata->getCutOff(), cutOff);
                convert(globaldata->getIters(), iters);
+               convert(globaldata->getStep(), step);
+               form = globaldata->getForm();
                matrix = globaldata->gMatrix;
                coverageFile = getRootName(globaldata->getPhylipFile()) + "coverage";
                summaryFile = getRootName(globaldata->getPhylipFile()) + "slsummary";
@@ -69,21 +77,36 @@ LibShuffCommand::~LibShuffCommand(){
 int LibShuffCommand::execute(){
        try {
                //deltaValues[0] = scores for the difference between AA and AB.
-               //cValues[0][0] = AA, cValues[0][1] = AB, cValues[0][2] = AC, cValues[1][0] = BA, cValues[1][1] = BB...
+               //cValues[0][0][0] = AA at distance 0.0, cValues[0][0][1] = AB at distance 0.0, cValues[0][0][2] = AC at distance 0.0, cValues[0][1][0] = BA at distance 0.0, cValues[0][1][1] = BB...
+               Progress* reading;
+               reading = new Progress("Comparing to random:", iters);
                
                sumDelta.resize(numComp-numGroups, 0.0);
-                               
-               float D = 0.0;
                
+               matrix->setBounds();
+               
+               //load distances
+               if (form != "discrete") { matrix->getDist(dist); }
+               else {
+                       float f = 0.0;
+                       while (f <= cutOff) {
+                               dist.push_back(f);
+                               f += step;
+                       }
+               }
+       
                /*****************************/
                //get values for users matrix
                /*****************************/
-               matrix->getMinsForRowsVectors();
-               
-               //get values for users matrix
-               while (D <= cutOff) {
-                       cValues = coverage->getValues(matrix, D);
                        
+               //clear out old Values
+               deltaValues.clear();
+               deltaValues.resize(dist.size());                        
+               
+               coverage->getValues(matrix, cValues, dist, "user");
+               
+               //loop through each distance and load rsumdelta
+               for (int p = 0; p < cValues.size(); p++) {      
                        //find delta values
                        int count = 0;
                        for (int i = 0; i < numGroups; i++) {
@@ -91,22 +114,16 @@ int LibShuffCommand::execute(){
                                        //don't save AA to AA
                                        if (i != j) {
                                                //(Caa - Cab)^2
-                                               deltaValues.push_back( (abs(cValues[i][i]-cValues[i][j]) * abs(cValues[i][i]-cValues[i][j])) ); 
-                                               sumDelta[count] += deltaValues[count];
+                                               deltaValues[p].push_back((cValues[p][i][i]-cValues[p][i][j]) * (cValues[p][i][i]-cValues[p][i][j]));
+                                               sumDelta[count] += deltaValues[p][count];
                                                count++;
                                        }
                                }
                        }
+               }
                        
-                       D += 0.01;
-                       
-                       printCoverageFile(D);
+               printCoverageFile();
                        
-                       //clear out old Values
-                       cValues.clear();
-                       deltaValues.clear();
-               }
-               
                /*******************************************************************************/
                //create and score random matrixes finding the sumDelta values for summary file
                /******************************************************************************/
@@ -119,12 +136,15 @@ int LibShuffCommand::execute(){
                        }
                }
                
+               
                for (int m = 0; m < iters; m++) {
                        //generate random matrix in getValues
                        //values for random matrix
-                       while (D <= cutOff) {
-                               cValues = coverage->getValues(matrix, D);
+               
+                       coverage->getValues(matrix, cValues, dist, "random");
                        
+                       //loop through each distance and load rsumdelta
+                       for (int p = 0; p < cValues.size(); p++) {
                                //find delta values
                                int count = 0;
                                for (int i = 0; i < numGroups; i++) {
@@ -132,19 +152,23 @@ int LibShuffCommand::execute(){
                                                //don't save AA to AA
                                                if (i != j) {
                                                        //(Caa - Cab)^2
-                                                       rsumDelta[count][m] += (abs(cValues[i][i]-cValues[i][j]) * abs(cValues[i][i]-cValues[i][j]));
+                                                       rsumDelta[count][m] += ((cValues[p][i][i]-cValues[p][i][j]) * (cValues[p][i][i]-cValues[p][i][j]));
                                                        count++;
                                                }
                                        }
                                }
-                       
-                               D += 0.01;
-                       
-                               //clear out old Values
-                               cValues.clear();
+                               
                        }
+
+                       //clear out old Values
+                       reading->update(m);
+                       cValues.clear();
+                       
                }
                
+               reading->finish();
+               delete reading;
+                               
                /**********************************************************/
                //find the signifigance of the user matrix' sumdelta values
                /**********************************************************/
@@ -181,27 +205,28 @@ int LibShuffCommand::execute(){
        }       
 }
 //**********************************************************************************************************************
-void LibShuffCommand::printCoverageFile(float d) {
+void LibShuffCommand::printCoverageFile() {
        try {
                //format output
                out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
                
-               out << setprecision(globaldata->getIters().length()) << d << '\t';
-               
-               //print out coverage values
-               for (int i = 0; i < numGroups; i++) {
-                       for (int j = 0; j < numGroups; j++) {
-                               out << cValues[i][j] << '\t';
+               //loop through each distance 
+               for (int p = 0; p < cValues.size(); p++) {
+                       out << setprecision(6) << dist[p] << '\t';
+                       //print out coverage values
+                       for (int i = 0; i < numGroups; i++) {
+                               for (int j = 0; j < numGroups; j++) {
+                                       out << cValues[p][i][j] << '\t';
+                               }
                        }
+                       
+                       for (int h = 0; h < deltaValues[p].size(); h++) {
+                               out << deltaValues[p][h] << '\t';
+                       }
+                       
+                       out << endl;
                }
                
-               //print out delta values
-               for (int i = 0; i < deltaValues.size(); i++) {
-                       out << deltaValues[i] << '\t';
-               }
-               
-               out << endl;
-       
        }
        catch(exception& e) {
                cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function printCoverageFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
@@ -222,18 +247,27 @@ void LibShuffCommand::printSummaryFile() {
                        for (int j = 0; j < numGroups; j++) {
                                //don't output AA to AA
                                if (i != j) {
-                                       outSum << "Delta" + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig" + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
+                                       outSum << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
+                                       cout << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
                                }
                        }
                }
                outSum << endl;
+               cout << endl;
                
                //print out delta values
                for (int i = 0; i < sumDelta.size(); i++) {
-                       outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
+                       if (sumDeltaSig[i] > (1/(float)iters)) {
+                               outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
+                               cout << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
+                       }else {
+                               outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << '\t';
+                               cout << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << '\t';
+                       }
                }
                
                outSum << endl;
+               cout << endl;
        
        }
        catch(exception& e) {
@@ -283,6 +317,9 @@ void LibShuffCommand::setGroups() {
                        }
                }
                
+               //sort so labels match
+               sort(globaldata->Groups.begin(), globaldata->Groups.end());
+               
                // number of comparisons i.e. with groups A,B,C = AA, AB, AC, BA, BB, BC...;
                for (int i=0; i<numGroups; i++) { 
                        for (int l = 0; l < numGroups; l++) {