X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=trialSwap2.cpp;h=9959443c9ffbb0c411dfcd0fc6dd6920fbe6f5c8;hp=812077d16c98c5a3c865f197d1ba6f50fbce07a6;hb=050a3ff02473a3d4c0980964e1a9ebe52e55d6b8;hpb=41a0537df3cb09cb8c725eab126fcf9bf28ebd8d diff --git a/trialSwap2.cpp b/trialSwap2.cpp index 812077d..9959443 100644 --- a/trialSwap2.cpp +++ b/trialSwap2.cpp @@ -4,531 +4,7 @@ //The sum_of_squares, havel_hakimi and calc_c_score algorithms have been adapted from I. Miklos and J. Podani. 2004. Randomization of presence-absence matrices: comments and new algorithms. Ecology 85:86-92. -/**************************************************************************************************/ -int TrialSwap2::sim1(vector > &co_matrix){ - try { - vector randRow; - vector > tmpmatrix; - int nrows = co_matrix.size(); - int ncols = co_matrix[0].size(); - - //clear co_matrix - // for(i=0;icontrol_pressed) { break; } - - for(int j=0;j 0.5) { - randRow.push_back(1); - }else{ - randRow.push_back(0); - } - } - tmpmatrix.push_back(randRow); - randRow.clear(); - //cout << endl; - } - co_matrix = tmpmatrix; - - return 0; - } - catch(exception& e) { - m->errorOut(e, "TrialSwap2", "sim1"); - exit(1); - } -} -/**************************************************************************************************/ -/* - *row sums fixed, columns equiprobable - */ -void TrialSwap2::sim2(vector > &co_matrix) -{ - try { - - for(int i=0;icontrol_pressed) { break; } - random_shuffle( co_matrix[i].begin(), co_matrix[i].end() ); - } - } - catch(exception& e) { - m->errorOut(e, "TrialSwap2", "sim2"); - exit(1); - } -} -/**************************************************************************************************/ -int TrialSwap2::sim2plus(vector rowtotal, vector > &co_matrix) -{ - try { - int nrows = co_matrix.size(); - int ncols = co_matrix[0].size(); - double cellprob = 1.0/ncols; - vector cellprobvec; - vector tmprow; - vector > tmpmatrix; - //double randNum; - - double start = 0.0; - - for(int i=0; icontrol_pressed) { return 0; } - cellprobvec.push_back(start + cellprob); - start = cellprobvec[i]; - } - - for(int i=0; icontrol_pressed) { return 0; } - double randNum = rand() / double(RAND_MAX); - //cout << randNum << endl; - if(randNum <= cellprobvec[0]) - { - tmprow[0] = 1; - continue; - } - for(int j=1;j cellprobvec[j-1] && tmprow[j] != 1) - { - tmprow[j] = 1; - } - } - } - tmpmatrix.push_back(tmprow); - tmprow.clear(); - } - co_matrix = tmpmatrix; - tmpmatrix.clear(); - cellprobvec.clear(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "TrialSwap2", "sim2plus"); - exit(1); - } -} -/**************************************************************************************************/ -/* - * same as sim2 but using initmatrix which is the initial co-occurrence matrix before transposition - * may have to be changed depending on what matrix 'seed' is used. One way to use is to transpose - * every null matrix before using an index and use the random matrix as a seed for the next null. - */ -/**************************************************************************************************/ -void TrialSwap2::sim3(vector > &initmatrix) -{ - try { - for(int i=0;icontrol_pressed) { break; } - random_shuffle( initmatrix[i].begin(), initmatrix[i].end() ); - } - - } - catch(exception& e) { - m->errorOut(e, "TrialSwap2", "sim3"); - exit(1); - } -} -/**************************************************************************************************/ -/* - * - * - * - */ -/**************************************************************************************************/ -int TrialSwap2::sim4(vector columntotal, vector rowtotal, vector > &co_matrix) -{ - try { - vector colProb; - vector tmprow;//(ncols, 7); - vector > tmpmatrix; - vector range; - vector randNums; - int ncols = columntotal.size(); - int nrows = rowtotal.size(); - tmprow.clear(); - - double colSum = accumulate( columntotal.begin(), columntotal.end(), 0 ); - //cout << "col sum: " << colSum << endl; - for(int i=0;icontrol_pressed) { return 0; } - colProb.push_back(columntotal[i]/colSum); - } - - double start = 0.0; - - for(int i=0;icontrol_pressed) { return 0; } - range.push_back(start + colProb[i]); - start = range[i]; - } - - for(int i=0;icontrol_pressed) { return 0; } - - while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < rowtotal[i]) - { - if (m->control_pressed) { return 0; } - - double randNum = rand() / double(RAND_MAX); - if(randNum <= range[0]) - { - tmprow[0] = 1; - continue; - } - for(int j=1;j range[j-1] && tmprow[j] != 1) - { - tmprow[j] = 1; - } - - } - } - tmpmatrix.push_back(tmprow); - tmprow.clear(); - } - - co_matrix = tmpmatrix; - - return 0; - } - catch(exception& e) { - m->errorOut(e, "TrialSwap2", "sim4"); - exit(1); - } -} -/**************************************************************************************************/ -/* - * inverse of sim4, MUST BE TRANSPOSED BEFORE CO-OCCURRENCE ANALYSIS - * - * - */ -/**************************************************************************************************/ -int TrialSwap2::sim5(vector initcolumntotal,vector initrowtotal, vector > &initmatrix) -{ - try { - vector colProb; - vector tmprow;//(ncols, 7); - vector > tmpmatrix; - vector range; - vector randNums; - int ncols = initcolumntotal.size(); - int nrows = initrowtotal.size(); - - tmprow.clear(); - - double colSum = accumulate( initcolumntotal.begin(), initcolumntotal.end(), 0 ); - //cout << "col sum: " << colSum << endl; - for(int i=0;icontrol_pressed) { return 0; } - colProb.push_back(initcolumntotal[i]/colSum); - } - - double start = 0.0; - - for(int i=0;icontrol_pressed) { return 0; } - range.push_back(start + colProb[i]); - start = range[i]; - } - - for(int i=0;icontrol_pressed) { return 0; } - - while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < initrowtotal[i]) - { - if (m->control_pressed) { return 0; } - - double randNum = rand() / double(RAND_MAX); - if(randNum <= range[0]) - { - tmprow[0] = 1; - continue; - } - for(int j=1;j range[j-1] && tmprow[j] != 1) - { - tmprow[j] = 1; - } - - } - } - tmpmatrix.push_back(tmprow); - tmprow.clear(); - } - - initmatrix = tmpmatrix; - return 0; - } - catch(exception& e) { - m->errorOut(e, "TrialSwap2", "sim5"); - exit(1); - } -} -/**************************************************************************************************/ -/* - * - * - * - */ -/**************************************************************************************************/ -int TrialSwap2::sim6(vector columntotal, vector > &co_matrix) -{ - try { - vector > tmpmatrix; - vector colProb; - vector tmprow; - vector range; - int ncols = columntotal.size(); - int nrows = co_matrix.size(); - - int colSum = accumulate( columntotal.begin(), columntotal.end(), 0 ); - - for(int i=0;icontrol_pressed) { return 0; } - colProb.push_back(columntotal[i]/double (colSum)); - } - - double start = 0.0; - - for(int i=0;icontrol_pressed) { return 0; } - range.push_back(start + colProb[i]); - start = range[i]; - } - - for(int i=0;icontrol_pressed) { return 0; } - tmprow.assign(ncols, 0); - int tmprowtotal; - tmprowtotal = (rand() / double (RAND_MAX)) * 10; - while ( tmprowtotal > ncols) { - if (m->control_pressed) { return 0; } - tmprowtotal = (rand() / double (RAND_MAX)) * 10; - } - //cout << tmprowtotal << endl; - //cout << accumulate( tmprow.begin(), tmprow.end(), 0 ) << endl; - - while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < tmprowtotal) - { - if (m->control_pressed) { return 0; } - double randNum = rand() / double(RAND_MAX); - //cout << randNum << endl; - if(randNum <= range[0]) - { - tmprow[0] = 1; - continue; - } - for(int j=1;j range[j-1] && tmprow[j] != 1) - { - tmprow[j] = 1; - } - - } - - - } - - tmpmatrix.push_back(tmprow); - tmprow.clear(); - } - - co_matrix = tmpmatrix; - tmpmatrix.clear(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "TrialSwap2", "sim6"); - exit(1); - } -} -/**************************************************************************************************/ -/* - * MUST BE TRANSPOSED BEFORE CO-OCCURRENCE ANALYSIS - * - * - */ -/**************************************************************************************************/ -int TrialSwap2::sim7(vector initrowtotal, vector > &co_matrix) -{ - try { - vector > probmatrix; - vector > tmpmatrix; - vector colProb; - vector probrow; - vector tmprow; - vector range; - double nc; - int ncols = co_matrix[0].size(); int nrows = co_matrix.size(); - - tmpmatrix.assign(nrows, vector(ncols, 0.)); - - int rowsum = accumulate( initrowtotal.begin(), initrowtotal.end(), 0 ); - - nc = rowsum * ncols; - //cout << nc << endl; - - //assign null matrix based on probabilities - - double start = 0.0; // don't reset start -- probs should be from 0-1 thoughout the entire matrix - - for(int i=0;icontrol_pressed) { return 0; } - //cout << initrowtotal[i]/double(nc) << endl; - double cellprob = initrowtotal[i]/double(nc); - //cout << cellprob << endl; - for(int j=0;jcontrol_pressed) { return 0; } - done: - //cout << k << endl; - //tmprowsum = accumulate( tmprowtotal.begin(), tmprowtotal.end(), 0 ); - double randNum = rand() / double(RAND_MAX); - //cout << randNum << "+" << endl; - //special case for the first entry - if(randNum <= probmatrix[0][0] && tmpmatrix[0][0] != 1) - { - tmpmatrix[0][0] = 1; - k++; - //cout << k << endl; - continue; - } - - - for(int i=0;icontrol_pressed) { return 0; } - for(int j=0;j probmatrix[i][j-1] && tmpmatrix[i][j] != 1) - { - tmpmatrix[i][j] = 1; - k++; - //cout << k << endl; - goto done; - } - //else - //k = k-1; - } - - } - - } - - co_matrix = tmpmatrix; - return 0; - //build probibility matrix - /* for(int i=0;i ncols) - tmprowtotal = (rand() / double (RAND_MAX)) * 10; - //cout << tmprowtotal << endl; - //cout << accumulate( tmprow.begin(), tmprow.end(), 0 ) << endl; - - while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < tmprowtotal) - { - double randNum = rand() / double(RAND_MAX); - //cout << randNum << endl; - if(randNum <= range[0]) - { - tmprow[0] = 1; - continue; - } - for(int j=1;j range[j-1] && tmprow[j] != 1) - { - tmprow[j] = 1; - } - } - } - - tmpmatrix.push_back(tmprow); - tmprow.clear(); - } - -/**************************************************************************************************/ -double TrialSwap2::calc_c_score (vector > &co_matrix, vector rowtotal, int ncols, int nrows) +double TrialSwap2::calc_c_score (vector > &co_matrix, vector rowtotal, int ncols, int nrows) { try { double cscore = 0.0; @@ -536,10 +12,10 @@ double TrialSwap2::calc_c_score (vector > &co_matrix, vector r double D; double normcscore = 0.0; int nonzeros = 0; - //int ncols = co_matrix[0].size(); int nrows = rowtotal.size(); + //int ncols = co_matrix[0].size(); int nrows = rowtotal.size(); vector > s; s.resize(nrows); for (int i = 0; i < nrows; i++) { s[i].resize(nrows,0.0); }//only fill half the matrix - + for(int i=0;i > &co_matrix, vector r if(maxD != 0) { normcscore += D/maxD; - nonzeros++; - } + nonzeros++; + } } } - cscore = cscore/(double)(nrows*(nrows-1)/2); + //cscore = cscore/(double)(nrows*(nrows-1)/2); //not normalized //cout << "normalized c score: " << normcscore/nonzeros << endl; - + cscore = normcscore/(double)nonzeros; + return cscore; } - catch(exception& e) { - m->errorOut(e, "TrialSwap2", "calc_c_score"); - exit(1); - } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "calc_c_score"); + exit(1); + } } /**************************************************************************************************/ -int TrialSwap2::calc_checker (vector > &co_matrix, vector rowtotal, int ncols, int nrows) +int TrialSwap2::calc_checker (vector > &co_matrix, vector rowtotal, int ncols, int nrows) { try { int cunits=0; //int s[nrows][ncols]; - //int ncols = co_matrix[0].size(); int nrows = rowtotal.size(); + //int ncols = co_matrix[0].size(); int nrows = rowtotal.size(); vector > s; s.resize(nrows); for (int i = 0; i < nrows; i++) { s[i].resize(nrows,0); }//only fill half the matrix @@ -619,12 +96,12 @@ int TrialSwap2::calc_checker (vector > &co_matrix, vector rowt } } - return cunits; + return cunits; + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "calc_checker"); + exit(1); } - catch(exception& e) { - m->errorOut(e, "TrialSwap2", "calc_checker"); - exit(1); - } } /**************************************************************************************************/ double TrialSwap2::calc_vratio (int nrows, int ncols, vector rowtotal, vector columntotal) @@ -633,14 +110,14 @@ double TrialSwap2::calc_vratio (int nrows, int ncols, vector rowtotal, vect //int nrows = rowtotal.size(); //int ncols = columntotal.size(); int sumCol = accumulate(columntotal.begin(), columntotal.end(), 0 ); - // int sumRow = accumulate(rowtotal.begin(), rowtotal.end(), 0 ); + // int sumRow = accumulate(rowtotal.begin(), rowtotal.end(), 0 ); double colAvg = (double) sumCol / (double) ncols; - // double rowAvg = (double) sumRow / (double) nrows; + // double rowAvg = (double) sumRow / (double) nrows; double p = 0.0; - // double totalRowVar = 0.0; + // double totalRowVar = 0.0; double rowVar = 0.0; double colVar = 0.0; @@ -649,7 +126,7 @@ double TrialSwap2::calc_vratio (int nrows, int ncols, vector rowtotal, vect if (m->control_pressed) { return 0; } p = (double) rowtotal[i]/(double) ncols; rowVar += p * (1.0-p); - } + } for(int i=0;i rowtotal, vect m->errorOut(e, "TrialSwap2", "calc_vratio"); exit(1); } - + } /**************************************************************************************************/ int TrialSwap2::calc_combo (int nrows, int ncols, vector > &nullmatrix) { try { //need to transpose so we can compare rows (row-major order) - int tmpnrows = nrows; + //int tmpnrows = nrows; vector > tmpmatrix; - + vector tmprow; if(!tmpmatrix.empty()) tmpmatrix.clear(); for (int i=0;i > &nullmatr for(int i=j+1;i<=ncols;i++) { //comparing matrix rows - if( (tmpmatrix[j] == tmpmatrix[i])) + if( (tmpmatrix[j] == tmpmatrix[i])) { match++; break; } - } - + } + //on the last iteration of a previously matched row it will add itself because it doesn't match any following rows, so that combination is counted if (match == 0) unique++; - } - return unique; + } + return unique; } catch(exception& e) { m->errorOut(e, "TrialSwap2", "calc_combo"); exit(1); } -} +} /**************************************************************************************************/ -int TrialSwap2::swap_checkerboards (vector > &co_matrix) +int TrialSwap2::swap_checkerboards (vector > &co_matrix, int ncols, int nrows) { try { - int ncols = co_matrix[0].size(); int nrows = co_matrix.size(); - int i, j, k, l; - i = m->getRandomIndex(nrows-1); - while((j = m->getRandomIndex(nrows-1) ) == i ) {;if (m->control_pressed) { return 0; }} - k = m->getRandomIndex(ncols-1); - while((l = m->getRandomIndex(ncols-1)) == k ) {;if (m->control_pressed) { return 0; }} + //do 100 runs to make sure enough swaps are happening. This does NOT mean that there will be 1000 swaps, but that is the theoretical max. + for(int a=0;a<1000;a++){ + int i, j, k, l; + i = m->getRandomIndex(nrows-1); + while((j = m->getRandomIndex(nrows-1) ) == i ) {;if (m->control_pressed) { return 0; }} + k = m->getRandomIndex(ncols-1); + while((l = m->getRandomIndex(ncols-1)) == k ) {;if (m->control_pressed) { return 0; }} - if((co_matrix[i][k]*co_matrix[j][l]==1 && co_matrix[i][l]+co_matrix[j][k]==0)||(co_matrix[i][k]+co_matrix[j][l]==0 && co_matrix[i][l]*co_matrix[j][k]==1)) //checking for checkerboard value and swap - { - co_matrix[i][k]=1-co_matrix[i][k]; - co_matrix[i][l]=1-co_matrix[i][l]; - co_matrix[j][k]=1-co_matrix[j][k]; - co_matrix[j][l]=1-co_matrix[j][l]; + if((co_matrix[i][k]*co_matrix[j][l]==1 && co_matrix[i][l]+co_matrix[j][k]==0)||(co_matrix[i][k]+co_matrix[j][l]==0 && co_matrix[i][l]*co_matrix[j][k]==1)) //checking for checkerboard value and swap + { + co_matrix[i][k]=1-co_matrix[i][k]; + co_matrix[i][l]=1-co_matrix[i][l]; + co_matrix[j][k]=1-co_matrix[j][k]; + co_matrix[j][l]=1-co_matrix[j][l]; + } } - + return 0; } catch(exception& e) { @@ -797,11 +276,11 @@ double TrialSwap2::t_test (double initialscore, int runs, double nullMean, vecto m->mothurOut("nullMean: " + toString(nullMean)); m->mothurOutEndLine(); - m->mothurOut("sum: " + toString(sum)); m->mothurOutEndLine(); + m->mothurOut("sum: " + toString(sum)); m->mothurOutEndLine(); sampleSD = sqrt( (1/runs) * sum ); - m->mothurOut("samplSD: " + toString(sampleSD)); m->mothurOutEndLine(); + m->mothurOut("samplSD: " + toString(sampleSD)); m->mothurOutEndLine(); t = (nullMean - initialscore) / (sampleSD / sqrt(runs)); @@ -813,18 +292,46 @@ double TrialSwap2::t_test (double initialscore, int runs, double nullMean, vecto } } /**************************************************************************************************/ +double TrialSwap2::getSD (int runs, vector scorevec, double nullMean) +{ + try{ + double sum = 0; + for(int i=0;icontrol_pressed) { return 0; } + sum += pow((scorevec[i] - nullMean),2); + } + return sqrt( (1/double(runs)) * sum ); + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "getSD"); + exit(1); + } +} +/**************************************************************************************************/ +double TrialSwap2::get_zscore (double sd, double nullMean, double initscore) +{ + try { + return (initscore - nullMean) / sd; + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "get_zscore"); + exit(1); + } +} +/**************************************************************************************************/ int TrialSwap2::print_matrix(vector > &matrix, int nrows, int ncols) { try { - m->mothurOut("matrix:"); m->mothurOutEndLine(); + m->mothurOut("matrix:"); m->mothurOutEndLine(); for (int i = 0; i < nrows; i++) { if (m->control_pressed) { return 0; } for (int j = 0; j < ncols; j++) { - m->mothurOut(toString(matrix[i][j])); - } + m->mothurOut(toString(matrix[i][j])); + } m->mothurOutEndLine(); } return 0; @@ -840,4 +347,3 @@ int TrialSwap2::print_matrix(vector > &matrix, int nrows, int ncols) -