X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=coverage.cpp;h=dea8dff84c4d50676a179f486bd4c805c73c3632;hb=39a9b207d1046f3409781c45e38b7a871133d224;hp=ba2167a10fbdfd321395446fb1273e67402ca48c;hpb=2d2fbc80f9359b19873ba3e63970b58f4f8f49f3;p=mothur.git diff --git a/coverage.cpp b/coverage.cpp index ba2167a..dea8dff 100644 --- a/coverage.cpp +++ b/coverage.cpp @@ -7,6 +7,12 @@ * */ +/* This class library coverage at the given distances of the Cramer-von Mises statistic. + you may refer to the "Integration of Microbial Ecology and Statistics: A Test To Compare Gene Libraries" + paper in Applied and Environmental Microbiology, Sept. 2004, p. 5485-5492 0099-2240/04/$8.00+0 + DOI: 10.1128/AEM.70.9.5485-5492.2004 Copyright 2004 American Society for Microbiology for more information. */ + + #include "coverage.h" //********************************************************************************************************************** @@ -15,60 +21,59 @@ Coverage::Coverage() { numUserGroups = globaldata->Groups.size(); numGroups = globaldata->gGroupmap->getNumGroups(); } + //********************************************************************************************************************** -void Coverage::getValues(FullMatrix* matrix, float d, vector< vector >& data) { +void Coverage::getValues(FullMatrix* matrix, vector< vector< vector > >& data, vector dist) { try { vector min; vector groups; //initialize data - data.resize(numUserGroups); + data.resize(dist.size()); for (int l = 0; l < data.size(); l++) { - data[l].push_back(0.0); + data[l].resize(numGroups); + for (int k = 0; k < data[l].size(); k++) { + data[l][k].push_back(0.0); + } } /**************************************/ //get the minimums for each comparision /**************************************/ int count = 0; - int a = 0; - int b = 0; + for (int i = 0; i < numGroups; i++) { for (int j = 0; j < numGroups; j++) { - - //is this "box" one hte user wants analyzed? + + //is this "box" one the user wants analyzed? if ((inUsersGroups(globaldata->gGroupmap->namesOfGroups[i], globaldata->Groups) == true) && (inUsersGroups(globaldata->gGroupmap->namesOfGroups[j], globaldata->Groups) == true)) { - min = matrix->getMins(count); //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB; - + min = matrix->getMins(count); //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB; + //find the coverage at this distance sort(min.begin(), min.end()); -//cout << "minvector for : " << globaldata->gGroupmap->namesOfGroups[i] + globaldata->gGroupmap->namesOfGroups[j] << endl; -//for(int h = 0; h d) { index = m; break; } - } - // if you don't find one than all the mins are less than d - if (index == -1) { index = min.size(); } + //loop through each distance and fill data + for (int k = 0; k < data.size(); k++) { + + int index = -1; + //find index in min where value is higher than d + for (int m = 0; m < min.size(); m++) { + if (min[m] > dist[k]) { index = m; break; } + } - //save value in data - data[a][b] = 1.0 - ((min.size()-index)/(float)min.size()); -//cout << "D = " << d << "data " << a << b << " = " << data[a][b] << endl; - if (b < numUserGroups-1) { b++; } - else{ //you are moving to a new row of "boxes" - b = 0; - a++; + // if you don't find one than all the mins are less than d + if (index == -1) { index = min.size(); } + + //save value in data + data[k][i][j] = 1.0 - ((min.size()-index)/(float)min.size()); + } } count++; } } + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the Coverage class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -79,70 +84,93 @@ void Coverage::getValues(FullMatrix* matrix, float d, vector< vector >& d exit(1); } } + //********************************************************************************************************************** -//For the random matrices -void Coverage::getValues(FullMatrix* matrix, float d, vector< vector >& data, string r) { +void Coverage::getValues(FullMatrix* matrix, vector< vector< vector > >& data, vector dist, string mode) { try { - vector min; + vector min1; + vector min2; vector groups; //initialize data - data.resize(numUserGroups); + data.resize(dist.size()); for (int l = 0; l < data.size(); l++) { - data[l].push_back(0.0); + data[l].resize(numGroups); + for (int k = 0; k < data[l].size(); k++) { + data[l][k].push_back(0.0); + } } - - /**************************************/ - //get the minimums for each comparision - /**************************************/ + int count = 0; - int a = 0; - int b = 0; + int count2 = 0; + + //for each box for (int i = 0; i < numGroups; i++) { for (int j = 0; j < numGroups; j++) { - - //is this "box" one hte user wants analyzed? - if ((inUsersGroups(globaldata->gGroupmap->namesOfGroups[i], globaldata->Groups) == true) && (inUsersGroups(globaldata->gGroupmap->namesOfGroups[j], globaldata->Groups) == true)) { -cout << "combo " << a << b << endl; -cout << "original matrix mins4rows " << endl; -matrix->printMinsForRows(cout); - //create random matrix for this comparison - matrix->shuffle(count); - - min = matrix->getMins(count); //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB; -cout << "shuffled matrix mins4rows " << endl; -matrix->printMinsForRows(cout); + + if (i != j) { + //is this "box" one the user wants analyzed? + if ((inUsersGroups(globaldata->gGroupmap->namesOfGroups[i], globaldata->Groups) == true) && (inUsersGroups(globaldata->gGroupmap->namesOfGroups[j], globaldata->Groups) == true)) { + + matrix->shuffle(globaldata->gGroupmap->namesOfGroups[i], globaldata->gGroupmap->namesOfGroups[j]); + + min1 = matrix->getMins(count); //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB; + min2 = matrix->getMins(count2); //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB; - //find the coverage at this distance - sort(min.begin(), min.end()); + //find the coverage at this distance + sort(min1.begin(), min1.end()); - int index = -1; - //find index in min where value is higher than d - for (int m = 0; m < min.size(); m++) { - if (min[m] > d) { index = m; break; } - } + //find the coverage at this distance + sort(min2.begin(), min2.end()); + + float distDiff = 0; + + //loop through each distance and fill data + for (int k = 0; k < data.size(); k++) { + //****** coverage of AA **********// + int index = -1; + //find index in min where value is higher than d + for (int m = 0; m < min1.size(); m++) { + if (min1[m] > dist[k]) { index = m; break; } + } - // if you don't find one than all the mins are less than d - if (index == -1) { index = min.size(); } + // if you don't find one than all the mins are less than d + if (index == -1) { index = min1.size(); } + + //****** coverage of AB **********// + int index2 = -1; + //find index in min where value is higher than d + for (int m = 0; m < min2.size(); m++) { + if (min2[m] > dist[k]) { index2 = m; break; } + } - //save value in data - data[a][b] = 1.0 - ((min.size()-index)/(float)min.size()); -cout << "D = " << d << "data " << a << b << " = " << data[a][b] << endl; - if (b < numUserGroups-1) { b++; } - else{ //you are moving to a new row of "boxes" - b = 0; - a++; + // if you don't find one than all the mins are less than d + if (index2 == -1) { index2 = min2.size(); } + + //coverage of ii + float covII = 1.0 - ((min1.size()-index)/(float)min1.size()); + + //coverage of ij + float covIJ = 1.0 - ((min2.size()-index2)/(float)min2.size()); + + //save value in data (Caa - Cab)^2 * distDiff + data[k][i][j] = ((covII-covIJ) * (covII-covIJ)) * distDiff; + + //update distDiff + if (k < data.size() - 1) { + distDiff = dist[k+1] - dist[k]; + } + } + + //put matrix back to original + matrix->restore(); } } - count++; - - //restore matrix to original form for next shuffle - matrix->restore(); -min = matrix->getMins(count-1); -cout << "restored matrix mins4rows " << endl; -matrix->printMinsForRows(cout); + count2++; } + count += numGroups+1; //go from AA to BB to CC } + } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the Coverage class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -152,7 +180,6 @@ matrix->printMinsForRows(cout); cout << "An unknown error has occurred in the Coverage class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } + } -//********************************************************************************************************************** - \ No newline at end of file