From: westcott Date: Mon, 23 Mar 2009 17:23:57 +0000 (+0000) Subject: fixed libshuff signif. values X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=66f474db0824eebab0cebd53992728a0e5c789ca fixed libshuff signif. values --- diff --git a/commandfactory.cpp b/commandfactory.cpp index 729895d..7670d41 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -31,7 +31,7 @@ #include "unifracweightedcommand.h" #include "libshuffcommand.h" #include "mothur.h" -#include "nocommand.h" +#include "nocommands.h" /***********************************************************/ diff --git a/coverage.cpp b/coverage.cpp index 6b514fb..e0f26c9 100644 --- a/coverage.cpp +++ b/coverage.cpp @@ -23,7 +23,7 @@ Coverage::Coverage() { } //********************************************************************************************************************** -void Coverage::getValues(FullMatrix* matrix, vector< vector< vector > >& data, vector dist, string mode) { +void Coverage::getValues(FullMatrix* matrix, vector< vector< vector > >& data, vector dist) { try { vector min; vector groups; @@ -41,19 +41,13 @@ void Coverage::getValues(FullMatrix* matrix, vector< vector< vector > >& //get the minimums for each comparision /**************************************/ int count = 0; - int a = 0; - int b = 0; + for (int i = 0; i < numGroups; i++) { for (int j = 0; j < numGroups; j++) { - //is this "box" one hte user wants analyzed? + //is this "box" one the user wants analyzed? if ((inUsersGroups(globaldata->gGroupmap->namesOfGroups[i], globaldata->Groups) == true) && (inUsersGroups(globaldata->gGroupmap->namesOfGroups[j], globaldata->Groups) == true)) { - if (mode == "random") { - //create random matrix for this comparison - matrix->shuffle(globaldata->gGroupmap->namesOfGroups[i], globaldata->gGroupmap->namesOfGroups[j]); - } - min = matrix->getMins(count); //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB; //find the coverage at this distance @@ -72,25 +66,113 @@ void Coverage::getValues(FullMatrix* matrix, vector< vector< vector > >& if (index == -1) { index = min.size(); } //save value in data - data[k][a][b] = 1.0 - ((min.size()-index)/(float)min.size()); + data[k][i][j] = 1.0 - ((min.size()-index)/(float)min.size()); } + } + count++; + } + } + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the Coverage class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the Coverage class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +//********************************************************************************************************************** +void Coverage::getValues(FullMatrix* matrix, vector< vector< vector > >& data, vector dist, string mode) { + try { + vector min1; + vector min2; + vector groups; + + //initialize data + data.resize(dist.size()); + for (int l = 0; l < data.size(); l++) { + data[l].resize(numGroups); + for (int k = 0; k < data[l].size(); k++) { + data[l][k].push_back(0.0); + } + } + + /**************************************/ + //get the minimums for each comparision + /**************************************/ + int count = 0; + int count2 = 0; + + //for each box + for (int i = 0; i < numGroups; i++) { + for (int j = 0; j < numGroups; j++) { + + if (i != j) { + //is this "box" one the user wants analyzed? + if ((inUsersGroups(globaldata->gGroupmap->namesOfGroups[i], globaldata->Groups) == true) && (inUsersGroups(globaldata->gGroupmap->namesOfGroups[j], globaldata->Groups) == true)) { - //move to next box - if (b < numUserGroups-1) { b++; } - else{ //you are moving to a new row of "boxes" - b = 0; - a++; - } + matrix->shuffle(globaldata->gGroupmap->namesOfGroups[i], globaldata->gGroupmap->namesOfGroups[j]); + + min1 = matrix->getMins(count); //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB; + min2 = matrix->getMins(count2); //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB; - count++; + //find the coverage at this distance + sort(min1.begin(), min1.end()); + + //find the coverage at this distance + sort(min2.begin(), min2.end()); + + float distDiff = 0; + + //loop through each distance and fill data + for (int k = 0; k < data.size(); k++) { + //****** coverage of AA **********// + int index = -1; + //find index in min where value is higher than d + for (int m = 0; m < min1.size(); m++) { + if (min1[m] > dist[k]) { index = m; break; } + } - if (mode == "random") { - //restore matrix to original form for next shuffle + // if you don't find one than all the mins are less than d + if (index == -1) { index = min1.size(); } + + //****** coverage of AB **********// + int index2 = -1; + //find index in min where value is higher than d + for (int m = 0; m < min2.size(); m++) { + if (min2[m] > dist[k]) { index2 = m; break; } + } + + // if you don't find one than all the mins are less than d + if (index2 == -1) { index2 = min2.size(); } + + //coverage of ii + float covII = 1.0 - ((min1.size()-index)/(float)min1.size()); + + //coverage of ij + float covIJ = 1.0 - ((min2.size()-index2)/(float)min2.size()); + + //save value in data (Caa - Cab)^2 * distDiff + data[k][i][j] = ((covII-covIJ) * (covII-covIJ)) * distDiff; + + //update distDiff + if (k < data.size() - 1) { + distDiff = dist[k+1] - dist[k]; + } + } + + //put matrix back to original matrix->restore(); } } + + count2++; } + count += numGroups+1; //go from AA to BB to CC } } @@ -102,7 +184,6 @@ void Coverage::getValues(FullMatrix* matrix, vector< vector< vector > >& cout << "An unknown error has occurred in the Coverage class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } + } -//********************************************************************************************************************** - \ No newline at end of file diff --git a/coverage.h b/coverage.h index 2f9fcfb..b4dccf7 100644 --- a/coverage.h +++ b/coverage.h @@ -23,7 +23,8 @@ class Coverage { public: Coverage(); ~Coverage(){}; - void getValues(FullMatrix*, vector< vector< vector > >&, vector, string); //matrix, container for results, vector of distances, mode + void getValues(FullMatrix*, vector< vector< vector > >&, vector, string); //matrix, container for results, vector of distances, mode - for random matrices + void getValues(FullMatrix*, vector< vector< vector > >&, vector); //for user matrix private: GlobalData* globaldata; diff --git a/libshuffcommand.cpp b/libshuffcommand.cpp index 4b18675..33a1a3b 100644 --- a/libshuffcommand.cpp +++ b/libshuffcommand.cpp @@ -103,11 +103,11 @@ int LibShuffCommand::execute(){ deltaValues.clear(); deltaValues.resize(dist.size()); - coverage->getValues(matrix, cValues, dist, "user"); + coverage->getValues(matrix, cValues, dist); - float distDiff = dist[0]; + float distDiff = 0; - //loop through each distance and load rsumdelta + //loop through each distance and load sumdelta for (int p = 0; p < cValues.size(); p++) { //find delta values int count = 0; @@ -115,16 +115,15 @@ int LibShuffCommand::execute(){ for (int j = 0; j < numGroups; j++) { //don't save AA to AA if (i != j) { - //(Caa - Cab)^2 - deltaValues[p].push_back(((cValues[p][i][i]-cValues[p][i][j]) * (cValues[p][i][i]-cValues[p][i][j])) * distDiff); + //(Caa - Cab)^2 * distDiff + deltaValues[p].push_back(((cValues[p][i][i]-cValues[p][i][j]) * (cValues[p][i][i]-cValues[p][i][j])) * distDiff); //* distDiff sumDelta[count] += deltaValues[p][count]; count++; } } } if (p < cValues.size() - 1) { - distDiff = dist[p+1] - dist[p]; -//cout << distDiff << endl; + distDiff = dist[p+1] - dist[p]; } } @@ -149,8 +148,6 @@ int LibShuffCommand::execute(){ coverage->getValues(matrix, cValues, dist, "random"); - distDiff = 1; - //loop through each distance and load rsumdelta for (int p = 0; p < cValues.size(); p++) { //find delta values @@ -159,16 +156,12 @@ int LibShuffCommand::execute(){ for (int j = 0; j < numGroups; j++) { //don't save AA to AA if (i != j) { - //(Caa - Cab)^2 - rsumDelta[count][m] += (((cValues[p][i][i]-cValues[p][i][j]) * (cValues[p][i][i]-cValues[p][i][j])) * distDiff); + //rsumDelta[3][500] = the sum of the delta scores for BB-BC for random matrix # 500. + rsumDelta[count][m] += cValues[p][i][j]; // where cValues[p][0][1] = delta value at distance p of AA-AB, cValues[p][1][2] = delta value at distance p of BB-BC. count++; } } } - - if (p < cValues.size() - 1) { - distDiff = dist[p+1] - dist[p]; - } } //clear out old Values @@ -331,6 +324,10 @@ void LibShuffCommand::setGroups() { //sort so labels match sort(globaldata->Groups.begin(), globaldata->Groups.end()); + //sort + sort(globaldata->gGroupmap->namesOfGroups.begin(), globaldata->gGroupmap->namesOfGroups.end()); + + // number of comparisons i.e. with groups A,B,C = AA, AB, AC, BA, BB, BC...; for (int i=0; i