]> git.donarmstrong.com Git - mothur.git/blobdiff - coverage.cpp
added sharedochiai and sharedanderberg calculators
[mothur.git] / coverage.cpp
index ba2167a10fbdfd321395446fb1273e67402ca48c..dea8dff84c4d50676a179f486bd4c805c73c3632 100644 (file)
@@ -7,6 +7,12 @@
  *
  */
 
+/* This class library coverage at the given distances of the Cramer-von Mises statistic.
+       you may refer to the "Integration of Microbial Ecology and Statistics: A Test To Compare Gene Libraries" 
+       paper in Applied and Environmental Microbiology, Sept. 2004, p. 5485-5492 0099-2240/04/$8.00+0  
+       DOI: 10.1128/AEM.70.9.5485-5492.2004 Copyright 2004 American Society for Microbiology for more information. */
+       
+       
 #include "coverage.h"
 
 //**********************************************************************************************************************
@@ -15,60 +21,59 @@ Coverage::Coverage() {
                numUserGroups = globaldata->Groups.size();
                numGroups = globaldata->gGroupmap->getNumGroups();
 }
+
 //**********************************************************************************************************************
-void Coverage::getValues(FullMatrix* matrix, float d, vector< vector<float> >& data) {
+void Coverage::getValues(FullMatrix* matrix, vector< vector< vector<float> > >& data, vector<float> dist) {
        try {
                vector<float> min;
                vector<string> groups;
                
                //initialize data
-               data.resize(numUserGroups);
+               data.resize(dist.size());
                for (int l = 0; l < data.size(); l++) {
-                       data[l].push_back(0.0);
+                       data[l].resize(numGroups);
+                       for (int k = 0; k < data[l].size(); k++) {
+                               data[l][k].push_back(0.0);
+                       }
                }
 
                /**************************************/
                //get the minimums for each comparision
                /**************************************/
                int count = 0;
-               int a = 0;
-               int b = 0;
+               
                for (int i = 0; i < numGroups; i++) {
                        for (int j = 0; j < numGroups; j++) {
-                               
-                               //is this "box" one hte user wants analyzed?
+                       
+                               //is this "box" one the user wants analyzed?
                                if ((inUsersGroups(globaldata->gGroupmap->namesOfGroups[i], globaldata->Groups) == true) && (inUsersGroups(globaldata->gGroupmap->namesOfGroups[j], globaldata->Groups) == true)) {
                                        
-                                       min = matrix->getMins(count); //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB;
-                                       
+                                       min = matrix->getMins(count);  //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB;
+
                                        //find the coverage at this distance
                                        sort(min.begin(), min.end());
-//cout << "minvector for : " << globaldata->gGroupmap->namesOfGroups[i] + globaldata->gGroupmap->namesOfGroups[j] << endl;
-//for(int h = 0; h<min.size(); h++) {
-// cout << min[h] << " ";
-//}
-//cout << endl;
-                                       int index = -1;
-                                       //find index in min where value is higher than d
-                                       for (int m = 0; m < min.size(); m++) {
-                                               if (min[m] > d) { index = m; break;     }
-                                       }
                                        
-                                       // if you don't find one than all the mins are less than d
-                                       if (index == -1) { index = min.size();  }
+                                       //loop through each distance and fill data
+                                       for (int k = 0; k < data.size(); k++) {
+                                       
+                                               int index = -1;
+                                               //find index in min where value is higher than d
+                                               for (int m = 0; m < min.size(); m++) {
+                                                       if (min[m] > dist[k])   { index = m; break;     }
+                                               }
                                        
-                                       //save value in data
-                                       data[a][b] = 1.0 - ((min.size()-index)/(float)min.size());
-//cout << "D = " << d << "data " << a << b << " = " << data[a][b] << endl;             
-                                       if (b < numUserGroups-1) {  b++;  }
-                                       else{ //you are moving to a new row of "boxes"
-                                               b = 0;
-                                               a++;
+                                               // if you don't find one than all the mins are less than d
+                                               if (index == -1) { index = min.size();  }
+                                       
+                                               //save value in data
+                                               data[k][i][j] = 1.0 - ((min.size()-index)/(float)min.size());
+       
                                        }
                                }
                                count++;
                        }
                }
+               
        }
        catch(exception& e) {
                cout << "Standard Error: " << e.what() << " has occurred in the Coverage class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
@@ -79,70 +84,93 @@ void Coverage::getValues(FullMatrix* matrix, float d, vector< vector<float> >& d
                exit(1);
        }       
 }
+
 //**********************************************************************************************************************
-//For the random matrices
-void Coverage::getValues(FullMatrix* matrix, float d, vector< vector<float> >& data, string r) {
+void Coverage::getValues(FullMatrix* matrix, vector< vector< vector<float> > >& data, vector<float> dist, string mode) {
        try {
-               vector<float> min;
+               vector<float> min1;
+               vector<float> min2;
                vector<string> groups;
                
                //initialize data
-               data.resize(numUserGroups);
+               data.resize(dist.size());
                for (int l = 0; l < data.size(); l++) {
-                       data[l].push_back(0.0);
+                       data[l].resize(numGroups);
+                       for (int k = 0; k < data[l].size(); k++) {
+                               data[l][k].push_back(0.0);
+                       }
                }
-
-               /**************************************/
-               //get the minimums for each comparision
-               /**************************************/
+               
                int count = 0;
-               int a = 0;
-               int b = 0;
+               int count2 = 0;
+               
+               //for each box
                for (int i = 0; i < numGroups; i++) {
                        for (int j = 0; j < numGroups; j++) {
-                       
-                               //is this "box" one hte user wants analyzed?
-                               if ((inUsersGroups(globaldata->gGroupmap->namesOfGroups[i], globaldata->Groups) == true) && (inUsersGroups(globaldata->gGroupmap->namesOfGroups[j], globaldata->Groups) == true)) {
-cout << "combo " << a << b << endl;
-cout << "original matrix mins4rows " << endl;
-matrix->printMinsForRows(cout);                                        
-                                       //create random matrix for this comparison
-                                       matrix->shuffle(count);
-                       
-                                       min = matrix->getMins(count);  //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB;
-cout << "shuffled matrix mins4rows " << endl;
-matrix->printMinsForRows(cout);
+                               
+                               if (i != j) {
+                                       //is this "box" one the user wants analyzed?
+                                       if ((inUsersGroups(globaldata->gGroupmap->namesOfGroups[i], globaldata->Groups) == true) && (inUsersGroups(globaldata->gGroupmap->namesOfGroups[j], globaldata->Groups) == true)) {
+                                       
+                                               matrix->shuffle(globaldata->gGroupmap->namesOfGroups[i], globaldata->gGroupmap->namesOfGroups[j]);
+                                       
+                                               min1 = matrix->getMins(count);  //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB;
+                                               min2 = matrix->getMins(count2);  //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB;
 
-                                       //find the coverage at this distance
-                                       sort(min.begin(), min.end());
+                                               //find the coverage at this distance
+                                               sort(min1.begin(), min1.end());
                                        
-                                       int index = -1;
-                                       //find index in min where value is higher than d
-                                       for (int m = 0; m < min.size(); m++) {
-                                               if (min[m] > d) { index = m; break;     }
-                                       }
+                                               //find the coverage at this distance
+                                               sort(min2.begin(), min2.end());
+                                       
+                                               float distDiff = 0;
+                                               
+                                               //loop through each distance and fill data
+                                               for (int k = 0; k < data.size(); k++) {
+                                                       //****** coverage of AA **********//
+                                                       int index = -1;
+                                                       //find index in min where value is higher than d
+                                                       for (int m = 0; m < min1.size(); m++) {
+                                                                       if (min1[m] > dist[k])  { index = m; break;     }
+                                                       }
                                        
-                                       // if you don't find one than all the mins are less than d
-                                       if (index == -1) { index = min.size();  }
+                                                       // if you don't find one than all the mins are less than d
+                                                       if (index == -1) { index = min1.size();  }
+                                                       
+                                                       //****** coverage of AB **********//
+                                                       int index2 = -1;
+                                                       //find index in min where value is higher than d
+                                                       for (int m = 0; m < min2.size(); m++) {
+                                                                       if (min2[m] > dist[k])  { index2 = m; break;    }
+                                                       }
                                        
-                                       //save value in data
-                                       data[a][b] = 1.0 - ((min.size()-index)/(float)min.size());
-cout << "D = " << d << "data " << a << b << " = " << data[a][b] << endl;               
-                                       if (b < numUserGroups-1) {  b++;  }
-                                       else{ //you are moving to a new row of "boxes"
-                                               b = 0;
-                                               a++;
+                                                       // if you don't find one than all the mins are less than d
+                                                       if (index2 == -1) { index2 = min2.size();  }
+
+                                                       //coverage of ii
+                                                       float covII = 1.0 - ((min1.size()-index)/(float)min1.size());
+                                                       
+                                                       //coverage of ij
+                                                       float covIJ = 1.0 - ((min2.size()-index2)/(float)min2.size());
+                                                       
+                                                       //save value in data (Caa - Cab)^2 * distDiff
+                                                       data[k][i][j] = ((covII-covIJ) * (covII-covIJ)) * distDiff;
+                                                       
+                                                       //update distDiff
+                                                       if (k < data.size() - 1) {
+                                                               distDiff = dist[k+1] - dist[k]; 
+                                                       }
+                                               }
+                                       
+                                               //put matrix back to original
+                                               matrix->restore();
                                        }
                                }
-                               count++;
-                               
-                               //restore matrix to original form for next shuffle
-                               matrix->restore();
-min = matrix->getMins(count-1); 
-cout << "restored matrix mins4rows " << endl;
-matrix->printMinsForRows(cout);
+                               count2++;
                        }
+                       count += numGroups+1; //go from AA to BB to CC
                }
+               
        }
        catch(exception& e) {
                cout << "Standard Error: " << e.what() << " has occurred in the Coverage class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
@@ -152,7 +180,6 @@ matrix->printMinsForRows(cout);
                cout << "An unknown error has occurred in the Coverage class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
                exit(1);
        }       
+
 }
-//**********************************************************************************************************************
 
-       
\ No newline at end of file