]> git.donarmstrong.com Git - mothur.git/blobdiff - libshuffcommand.cpp
added sharedochiai and sharedanderberg calculators
[mothur.git] / libshuffcommand.cpp
index bd0166213bcf26dde642856b4004031d3480110a..33a1a3ba0c4bf35e1ad005396a48b2a9c5c93add 100644 (file)
@@ -7,6 +7,12 @@
  *
  */
 
+/* This class is designed to implement an integral form of the Cramer-von Mises statistic.
+       you may refer to the "Integration of Microbial Ecology and Statistics: A Test To Compare Gene Libraries" 
+       paper in Applied and Environmental Microbiology, Sept. 2004, p. 5485-5492 0099-2240/04/$8.00+0  
+       DOI: 10.1128/AEM.70.9.5485-5492.2004 Copyright 2004 American Society for Microbiology for more information. */
+
+
 #include "libshuffcommand.h"
 
 //**********************************************************************************************************************
@@ -71,71 +77,58 @@ LibShuffCommand::~LibShuffCommand(){
 int LibShuffCommand::execute(){
        try {
                //deltaValues[0] = scores for the difference between AA and AB.
-               //cValues[0][0] = AA, cValues[0][1] = AB, cValues[0][2] = AC, cValues[1][0] = BA, cValues[1][1] = BB...
-               vector<float> dist;
-               int next;
+               //cValues[0][0][0] = AA at distance 0.0, cValues[0][0][1] = AB at distance 0.0, cValues[0][0][2] = AC at distance 0.0, cValues[0][1][0] = BA at distance 0.0, cValues[0][1][1] = BB...
+               Progress* reading;
+               reading = new Progress("Comparing to random:", iters);
                
                sumDelta.resize(numComp-numGroups, 0.0);
                
-               float D = 0.0;
-       
-               /*****************************/
-               //get values for users matrix
-               /*****************************/
                matrix->setBounds();
                
-               if (form != "discrete") { matrix->getDist(dist); next = 1; }
-//cout << "Distances" << endl;
-//for (int i = 0; i < dist.size(); i++) { cout << dist[i] << " "; }    
-//cout << endl;
+               //load distances
+               if (form != "discrete") { matrix->getDist(dist); }
+               else {
+                       float f = 0.0;
+                       while (f <= cutOff) {
+                               dist.push_back(f);
+                               f += step;
+                       }
+               }
        
+               /*****************************/
                //get values for users matrix
-               while (D <= cutOff) {
-                       //clear out old Values
-                       deltaValues.clear();                    
-                       coverage->getValues(matrix, D, cValues);
+               /*****************************/
                        
+               //clear out old Values
+               deltaValues.clear();
+               deltaValues.resize(dist.size());                        
+               
+               coverage->getValues(matrix, cValues, dist);
+               
+               float distDiff = 0;
+               
+               //loop through each distance and load sumdelta
+               for (int p = 0; p < cValues.size(); p++) {      
                        //find delta values
                        int count = 0;
                        for (int i = 0; i < numGroups; i++) {
                                for (int j = 0; j < numGroups; j++) {
                                        //don't save AA to AA
                                        if (i != j) {
-                                               //(Caa - Cab)^2
-                                               deltaValues.push_back( (abs(cValues[i][i]-cValues[i][j]) * abs(cValues[i][i]-cValues[i][j])) ); 
-                                               sumDelta[count] += deltaValues[count];
+                                               //(Caa - Cab)^2 * distDiff
+                                               deltaValues[p].push_back(((cValues[p][i][i]-cValues[p][i][j]) * (cValues[p][i][i]-cValues[p][i][j])) * distDiff); //* distDiff
+                                               sumDelta[count] += deltaValues[p][count];
                                                count++;
                                        }
                                }
                        }
-                       
-                       printCoverageFile(D);
-                       
-                       //check form
-                       if (form != "discrete") {   
-                               if (next == dist.size()) { break; }
-                               else {  D = dist[next];  next++;        }
-                       }else {  D += step;  }
-                       
-
-               }
-               
-               //output sum Deltas
-               for (int i = 0; i < numGroups; i++) {
-                       for (int j = 0; j < numGroups; j++) {
-                               //don't output AA to AA
-                               if (i != j) {
-                                       cout << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
-                               }
+                       if (p < cValues.size() - 1) {
+                               distDiff = dist[p+1] - dist[p]; 
                        }
                }
-               cout << endl;
-               
-               for (int i = 0; i < sumDelta.size(); i++) {
-                       cout << setprecision(6) << sumDelta[i] << '\t';
-               }
-               cout << endl;
-                               
+                       
+               printCoverageFile();
+                       
                /*******************************************************************************/
                //create and score random matrixes finding the sumDelta values for summary file
                /******************************************************************************/
@@ -152,45 +145,34 @@ int LibShuffCommand::execute(){
                for (int m = 0; m < iters; m++) {
                        //generate random matrix in getValues
                        //values for random matrix
-                       cout << "Iteration " << m+1 << endl;
-                       D = 0.0;
-                       next = 1;
-                       
-                       while (D <= cutOff) {
-                               coverage->getValues(matrix, D, cValues, "random");
+               
+                       coverage->getValues(matrix, cValues, dist, "random");
                        
+                       //loop through each distance and load rsumdelta
+                       for (int p = 0; p < cValues.size(); p++) {
                                //find delta values
                                int count = 0;
                                for (int i = 0; i < numGroups; i++) {
                                        for (int j = 0; j < numGroups; j++) {
                                                //don't save AA to AA
                                                if (i != j) {
-                                                       //(Caa - Cab)^2
-                                                       rsumDelta[count][m] += ((abs(cValues[i][i]-cValues[i][j]) * abs(cValues[i][i]-cValues[i][j])));
-//cout << "iter " << m << " box " << i << j << " delta = " << ((abs(cValues[i][i]-cValues[i][j]) * abs(cValues[i][i]-cValues[i][j]))) << endl;
+                                                       //rsumDelta[3][500] = the sum of the delta scores for BB-BC for random matrix # 500.
+                                                       rsumDelta[count][m] += cValues[p][i][j];  // where cValues[p][0][1] = delta value at distance p of AA-AB, cValues[p][1][2] = delta value at distance p of BB-BC. 
                                                        count++;
                                                }
                                        }
                                }
-
-                               //check form
-                               if (form != "discrete") {   
-                                       if (next == dist.size()) { break; }
-                                       else {  D = dist[next];  next++;        }
-                               }else {  D += step;  }
-
-                       
-                               //clear out old Values
-                               cValues.clear();
                        }
-cout << "random sum delta for iter " << m << endl;
-for (int i = 0; i < rsumDelta.size(); i++) {
-       cout << setprecision(6) << rsumDelta[i][m] << '\t';
-}
-cout << endl;
 
+                       //clear out old Values
+                       reading->update(m);
+                       cValues.clear();
+                       
                }
                
+               reading->finish();
+               delete reading;
+                               
                /**********************************************************/
                //find the signifigance of the user matrix' sumdelta values
                /**********************************************************/
@@ -227,27 +209,28 @@ cout << endl;
        }       
 }
 //**********************************************************************************************************************
-void LibShuffCommand::printCoverageFile(float d) {
+void LibShuffCommand::printCoverageFile() {
        try {
                //format output
                out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
                
-               out << setprecision(6) << d << '\t';
-               
-               //print out coverage values
-               for (int i = 0; i < numGroups; i++) {
-                       for (int j = 0; j < numGroups; j++) {
-                               out << cValues[i][j] << '\t';
+               //loop through each distance 
+               for (int p = 0; p < cValues.size(); p++) {
+                       out << setprecision(6) << dist[p] << '\t';
+                       //print out coverage values
+                       for (int i = 0; i < numGroups; i++) {
+                               for (int j = 0; j < numGroups; j++) {
+                                       out << cValues[p][i][j] << '\t';
+                               }
                        }
+                       
+                       for (int h = 0; h < deltaValues[p].size(); h++) {
+                               out << deltaValues[p][h] << '\t';
+                       }
+                       
+                       out << endl;
                }
                
-               //print out delta values
-               for (int i = 0; i < deltaValues.size(); i++) {
-                       out << deltaValues[i] << '\t';
-               }
-               
-               out << endl;
-       
        }
        catch(exception& e) {
                cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function printCoverageFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
@@ -278,8 +261,13 @@ void LibShuffCommand::printSummaryFile() {
                
                //print out delta values
                for (int i = 0; i < sumDelta.size(); i++) {
-                       outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
-                       cout << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
+                       if (sumDeltaSig[i] > (1/(float)iters)) {
+                               outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
+                               cout << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
+                       }else {
+                               outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << '\t';
+                               cout << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << '\t';
+                       }
                }
                
                outSum << endl;
@@ -336,6 +324,10 @@ void LibShuffCommand::setGroups() {
                //sort so labels match
                sort(globaldata->Groups.begin(), globaldata->Groups.end());
                
+               //sort
+               sort(globaldata->gGroupmap->namesOfGroups.begin(), globaldata->gGroupmap->namesOfGroups.end());
+
+               
                // number of comparisons i.e. with groups A,B,C = AA, AB, AC, BA, BB, BC...;
                for (int i=0; i<numGroups; i++) { 
                        for (int l = 0; l < numGroups; l++) {