X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=trialSwap2.cpp;h=9959443c9ffbb0c411dfcd0fc6dd6920fbe6f5c8;hp=c580436b3b3d542150502d2b771f426bc4aed8a7;hb=d1c97b8c04bb75faca1e76ffad60b37a4d789d3d;hpb=91a27e0483827c06c21c4fe89558923bbfe86573 diff --git a/trialSwap2.cpp b/trialSwap2.cpp index c580436..9959443 100644 --- a/trialSwap2.cpp +++ b/trialSwap2.cpp @@ -4,647 +4,7 @@ //The sum_of_squares, havel_hakimi and calc_c_score algorithms have been adapted from I. Miklos and J. Podani. 2004. Randomization of presence-absence matrices: comments and new algorithms. Ecology 85:86-92. -/************************************************************************************************** -int TrialSwap2::intrand(int n){ - try { - double z; - - z = (double)random() * (double)n / (double)RAND_MAX; - if(z>=n) - z=n-1; - if(z<0) - z=0; - return((int)floor(z)); - } - catch(exception& e) { - m->errorOut(e, "TrialSwap2", "intrand"); - exit(1); - } -} -/**************************************************************************************************/ -/* completely random matrix, all column and row totals are variable, matrix size is the same - * - * - */ -/**************************************************************************************************/ -int TrialSwap2::sim1(vector > &co_matrix){ - try { - vector randRow; - vector > tmpmatrix; - int nrows = co_matrix.size(); - int ncols = co_matrix[0].size(); - - //clear co_matrix - // for(i=0;icontrol_pressed) { break; } - - for(int j=0;j 0.5) { - randRow.push_back(1); - }else{ - randRow.push_back(0); - } - } - tmpmatrix.push_back(randRow); - randRow.clear(); - //cout << endl; - } - co_matrix = tmpmatrix; - - return 0; - } - catch(exception& e) { - m->errorOut(e, "TrialSwap2", "sim1"); - exit(1); - } -} -/**************************************************************************************************/ -/* - *row sums fixed, columns equiprobable - */ -void TrialSwap2::sim2(vector > &co_matrix) -{ - try { - - for(int i=0;icontrol_pressed) { break; } - random_shuffle( co_matrix[i].begin(), co_matrix[i].end() ); - } - } - catch(exception& e) { - m->errorOut(e, "TrialSwap2", "sim2"); - exit(1); - } -} -/**************************************************************************************************/ -int TrialSwap2::sim2plus(vector rowtotal, vector > &co_matrix) -{ - try { - int nrows = co_matrix.size(); - int ncols = co_matrix[0].size(); - double cellprob = 1.0/ncols; - vector cellprobvec; - vector tmprow; - vector > tmpmatrix; - //double randNum; - - double start = 0.0; - - for(int i=0; icontrol_pressed) { return 0; } - cellprobvec.push_back(start + cellprob); - start = cellprobvec[i]; - } - - for(int i=0; icontrol_pressed) { return 0; } - double randNum = rand() / double(RAND_MAX); - //cout << randNum << endl; - if(randNum <= cellprobvec[0]) - { - tmprow[0] = 1; - continue; - } - for(int j=1;j cellprobvec[j-1] && tmprow[j] != 1) - { - tmprow[j] = 1; - } - } - } - tmpmatrix.push_back(tmprow); - tmprow.clear(); - } - co_matrix = tmpmatrix; - tmpmatrix.clear(); - cellprobvec.clear(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "TrialSwap2", "sim2plus"); - exit(1); - } -} -/**************************************************************************************************/ -/* - * same as sim2 but using initmatrix which is the initial co-occurrence matrix before transposition - * may have to be changed depending on what matrix 'seed' is used. One way to use is to transpose - * every null matrix before using an index and use the random matrix as a seed for the next null. - */ -/**************************************************************************************************/ -void TrialSwap2::sim3(vector > &initmatrix) -{ - try { - for(int i=0;icontrol_pressed) { break; } - random_shuffle( initmatrix[i].begin(), initmatrix[i].end() ); - } - - } - catch(exception& e) { - m->errorOut(e, "TrialSwap2", "sim3"); - exit(1); - } -} -/**************************************************************************************************/ -/* - * - * - * - */ -/**************************************************************************************************/ -int TrialSwap2::sim4(vector columntotal, vector rowtotal, vector > &co_matrix) -{ - try { - vector colProb; - vector tmprow;//(ncols, 7); - vector > tmpmatrix; - vector range; - vector randNums; - int ncols = columntotal.size(); - int nrows = rowtotal.size(); - tmprow.clear(); - - double colSum = accumulate( columntotal.begin(), columntotal.end(), 0 ); - //cout << "col sum: " << colSum << endl; - for(int i=0;icontrol_pressed) { return 0; } - colProb.push_back(columntotal[i]/colSum); - } - - double start = 0.0; - - for(int i=0;icontrol_pressed) { return 0; } - range.push_back(start + colProb[i]); - start = range[i]; - } - - for(int i=0;icontrol_pressed) { return 0; } - - while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < rowtotal[i]) - { - if (m->control_pressed) { return 0; } - - double randNum = rand() / double(RAND_MAX); - if(randNum <= range[0]) - { - tmprow[0] = 1; - continue; - } - for(int j=1;j range[j-1] && tmprow[j] != 1) - { - tmprow[j] = 1; - } - - } - } - tmpmatrix.push_back(tmprow); - tmprow.clear(); - } - - co_matrix = tmpmatrix; - - return 0; - } - catch(exception& e) { - m->errorOut(e, "TrialSwap2", "sim4"); - exit(1); - } -} -/**************************************************************************************************/ -/* - * inverse of sim4, MUST BE TRANSPOSED BEFORE CO-OCCURRENCE ANALYSIS - * - * - */ -/**************************************************************************************************/ -int TrialSwap2::sim5(vector initcolumntotal,vector initrowtotal, vector > &initmatrix) -{ - try { - vector colProb; - vector tmprow;//(ncols, 7); - vector > tmpmatrix; - vector range; - vector randNums; - int ncols = initcolumntotal.size(); - int nrows = initrowtotal.size(); - - tmprow.clear(); - - double colSum = accumulate( initcolumntotal.begin(), initcolumntotal.end(), 0 ); - //cout << "col sum: " << colSum << endl; - for(int i=0;icontrol_pressed) { return 0; } - colProb.push_back(initcolumntotal[i]/colSum); - } - - double start = 0.0; - - for(int i=0;icontrol_pressed) { return 0; } - range.push_back(start + colProb[i]); - start = range[i]; - } - - for(int i=0;icontrol_pressed) { return 0; } - - while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < initrowtotal[i]) - { - if (m->control_pressed) { return 0; } - - double randNum = rand() / double(RAND_MAX); - if(randNum <= range[0]) - { - tmprow[0] = 1; - continue; - } - for(int j=1;j range[j-1] && tmprow[j] != 1) - { - tmprow[j] = 1; - } - - } - } - tmpmatrix.push_back(tmprow); - tmprow.clear(); - } - - initmatrix = tmpmatrix; - return 0; - } - catch(exception& e) { - m->errorOut(e, "TrialSwap2", "sim5"); - exit(1); - } -} -/**************************************************************************************************/ -/* - * - * - * - */ -/**************************************************************************************************/ -int TrialSwap2::sim6(vector columntotal, vector > &co_matrix) -{ - try { - vector > tmpmatrix; - vector colProb; - vector tmprow; - vector range; - int ncols = columntotal.size(); - int nrows = co_matrix.size(); - - int colSum = accumulate( columntotal.begin(), columntotal.end(), 0 ); - - for(int i=0;icontrol_pressed) { return 0; } - colProb.push_back(columntotal[i]/double (colSum)); - } - - double start = 0.0; - - for(int i=0;icontrol_pressed) { return 0; } - range.push_back(start + colProb[i]); - start = range[i]; - } - - for(int i=0;icontrol_pressed) { return 0; } - tmprow.assign(ncols, 0); - int tmprowtotal; - tmprowtotal = (rand() / double (RAND_MAX)) * 10; - while ( tmprowtotal > ncols) { - if (m->control_pressed) { return 0; } - tmprowtotal = (rand() / double (RAND_MAX)) * 10; - } - //cout << tmprowtotal << endl; - //cout << accumulate( tmprow.begin(), tmprow.end(), 0 ) << endl; - - while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < tmprowtotal) - { - if (m->control_pressed) { return 0; } - double randNum = rand() / double(RAND_MAX); - //cout << randNum << endl; - if(randNum <= range[0]) - { - tmprow[0] = 1; - continue; - } - for(int j=1;j range[j-1] && tmprow[j] != 1) - { - tmprow[j] = 1; - } - - } - - - } - - tmpmatrix.push_back(tmprow); - tmprow.clear(); - } - - co_matrix = tmpmatrix; - tmpmatrix.clear(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "TrialSwap2", "sim6"); - exit(1); - } -} -/**************************************************************************************************/ -/* - * MUST BE TRANSPOSED BEFORE CO-OCCURRENCE ANALYSIS - * - * - */ -/**************************************************************************************************/ -int TrialSwap2::sim7(vector initrowtotal, vector > &co_matrix) -{ - try { - vector > probmatrix; - vector > tmpmatrix; - vector colProb; - vector probrow; - vector tmprow; - vector range; - double nc; - int ncols = co_matrix[0].size(); int nrows = co_matrix.size(); - - tmpmatrix.assign(nrows, vector(ncols, 0.)); - - int rowsum = accumulate( initrowtotal.begin(), initrowtotal.end(), 0 ); - - nc = rowsum * ncols; - //cout << nc << endl; - - //assign null matrix based on probabilities - - double start = 0.0; // don't reset start -- probs should be from 0-1 thoughout the entire matrix - - for(int i=0;icontrol_pressed) { return 0; } - //cout << initrowtotal[i]/double(nc) << endl; - double cellprob = initrowtotal[i]/double(nc); - //cout << cellprob << endl; - for(int j=0;jcontrol_pressed) { return 0; } - done: - //cout << k << endl; - //tmprowsum = accumulate( tmprowtotal.begin(), tmprowtotal.end(), 0 ); - double randNum = rand() / double(RAND_MAX); - //cout << randNum << "+" << endl; - //special case for the first entry - if(randNum <= probmatrix[0][0] && tmpmatrix[0][0] != 1) - { - tmpmatrix[0][0] = 1; - k++; - //cout << k << endl; - continue; - } - - - for(int i=0;icontrol_pressed) { return 0; } - for(int j=0;j probmatrix[i][j-1] && tmpmatrix[i][j] != 1) - { - tmpmatrix[i][j] = 1; - k++; - //cout << k << endl; - goto done; - } - //else - //k = k-1; - } - - } - - } - - co_matrix = tmpmatrix; - return 0; - //build probibility matrix - /* for(int i=0;i ncols) - tmprowtotal = (rand() / double (RAND_MAX)) * 10; - //cout << tmprowtotal << endl; - //cout << accumulate( tmprow.begin(), tmprow.end(), 0 ) << endl; - - while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < tmprowtotal) - { - double randNum = rand() / double(RAND_MAX); - //cout << randNum << endl; - if(randNum <= range[0]) - { - tmprow[0] = 1; - continue; - } - for(int j=1;j range[j-1] && tmprow[j] != 1) - { - tmprow[j] = 1; - } - } - } - - tmpmatrix.push_back(tmprow); - tmprow.clear(); - } - - initmatrix = tmpmatrix; - */ - } - catch(exception& e) { - m->errorOut(e, "TrialSwap2", "sim7"); - exit(1); - } -} -/**************************************************************************************************/ -/* - * - * - * - */ -/**************************************************************************************************/ -int TrialSwap2::sim8(vector columntotal, vector rowtotal, vector > &co_matrix) -{ - try { - double prob; - double start = 0.0; - int ncols = columntotal.size(); int nrows = rowtotal.size(); - double probarray[nrows * ncols]; - double randnum; - int grandtotal; - int total = 0; - - //double colSum = accumulate( columntotal.begin(), columntotal.end(), 0 ); - double rowSum = accumulate( rowtotal.begin(), rowtotal.end(), 0 ); - - if (m->control_pressed) { return 0; } - - //cout << "rowsum: " << rowSum << endl; - - grandtotal = rowSum; - - //create probability matrix with each site being between 0 and 1 - for (int i=0;icontrol_pressed) { return 0; } - for (int j=0;jcontrol_pressed) { return 0; } - randnum = rand() / double(RAND_MAX); - //cout << "rand num: " << randnum << endl; - if((randnum <= probarray[0]) && (probarray[0] != 2) ) { - probarray[0] = 2; - total++; - continue; - } - for(int i=1;i<(nrows*ncols);i++) { - if (m->control_pressed) { return 0; } - if((randnum <= probarray[i]) && (randnum > probarray[i-1]) && (probarray[i] != 2) ) { - probarray[i] = 2; - total++; - break; - } - else - continue; - } - } - //cout << "prbarray" << endl; - //for(int i=0;i<(nrows*ncols);i++) - //cout << probarray[i] << " "; - //cout << endl; - for(int i=0;icontrol_pressed) { return 0; } - for(int j=0;jerrorOut(e, "TrialSwap2", "sim8"); - exit(1); - } -} -/**************************************************************************************************/ -double TrialSwap2::calc_c_score (vector > &co_matrix,vector rowtotal) +double TrialSwap2::calc_c_score (vector > &co_matrix, vector rowtotal, int ncols, int nrows) { try { double cscore = 0.0; @@ -652,10 +12,10 @@ double TrialSwap2::calc_c_score (vector > &co_matrix,vector ro double D; double normcscore = 0.0; int nonzeros = 0; - int ncols = co_matrix[0].size(); int nrows = rowtotal.size(); + //int ncols = co_matrix[0].size(); int nrows = rowtotal.size(); vector > s; s.resize(nrows); for (int i = 0; i < nrows; i++) { s[i].resize(nrows,0.0); }//only fill half the matrix - + for(int i=0;i > &co_matrix,vector ro if(maxD != 0) { normcscore += D/maxD; - nonzeros++; - } + nonzeros++; + } } } - cscore = cscore/(double)(nrows*(nrows-1)/2); + //cscore = cscore/(double)(nrows*(nrows-1)/2); //not normalized //cout << "normalized c score: " << normcscore/nonzeros << endl; - + cscore = normcscore/(double)nonzeros; + return cscore; } - catch(exception& e) { - m->errorOut(e, "TrialSwap2", "calc_c_score"); - exit(1); - } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "calc_c_score"); + exit(1); + } } /**************************************************************************************************/ -int TrialSwap2::calc_checker (vector > &co_matrix, vector rowtotal) +int TrialSwap2::calc_checker (vector > &co_matrix, vector rowtotal, int ncols, int nrows) { try { int cunits=0; //int s[nrows][ncols]; - int ncols = co_matrix[0].size(); int nrows = rowtotal.size(); + //int ncols = co_matrix[0].size(); int nrows = rowtotal.size(); vector > s; s.resize(nrows); for (int i = 0; i < nrows; i++) { s[i].resize(nrows,0); }//only fill half the matrix @@ -735,28 +96,28 @@ int TrialSwap2::calc_checker (vector > &co_matrix, vector rowt } } - return cunits; + return cunits; + } + catch(exception& e) { + m->errorOut(e, "TrialSwap2", "calc_checker"); + exit(1); } - catch(exception& e) { - m->errorOut(e, "TrialSwap2", "calc_checker"); - exit(1); - } } /**************************************************************************************************/ -double TrialSwap2::calc_vratio (vector rowtotal, vector columntotal) +double TrialSwap2::calc_vratio (int nrows, int ncols, vector rowtotal, vector columntotal) { try { - int nrows = rowtotal.size(); - int ncols = columntotal.size(); + //int nrows = rowtotal.size(); + //int ncols = columntotal.size(); int sumCol = accumulate(columntotal.begin(), columntotal.end(), 0 ); - // int sumRow = accumulate(rowtotal.begin(), rowtotal.end(), 0 ); + // int sumRow = accumulate(rowtotal.begin(), rowtotal.end(), 0 ); double colAvg = (double) sumCol / (double) ncols; - // double rowAvg = (double) sumRow / (double) nrows; + // double rowAvg = (double) sumRow / (double) nrows; double p = 0.0; - // double totalRowVar = 0.0; + // double totalRowVar = 0.0; double rowVar = 0.0; double colVar = 0.0; @@ -765,7 +126,7 @@ double TrialSwap2::calc_vratio (vector rowtotal, vector columntotal) if (m->control_pressed) { return 0; } p = (double) rowtotal[i]/(double) ncols; rowVar += p * (1.0-p); - } + } for(int i=0;i rowtotal, vector columntotal) m->errorOut(e, "TrialSwap2", "calc_vratio"); exit(1); } - + } /**************************************************************************************************/ -int TrialSwap2::calc_combo (vector > &initmatrix) +int TrialSwap2::calc_combo (int nrows, int ncols, vector > &nullmatrix) { try { - int initrows = initmatrix.size(); + //need to transpose so we can compare rows (row-major order) + //int tmpnrows = nrows; + vector > tmpmatrix; + + vector tmprow; + if(!tmpmatrix.empty()) + tmpmatrix.clear(); + for (int i=0;icontrol_pressed) { return 0; } - if( (initmatrix[i] == initmatrix[j])) + //comparing matrix rows + if( (tmpmatrix[j] == tmpmatrix[i])) { match++; - matches++; break; } - } + } //on the last iteration of a previously matched row it will add itself because it doesn't match any following rows, so that combination is counted if (match == 0) @@ -815,31 +191,29 @@ int TrialSwap2::calc_combo (vector > &initmatrix) m->errorOut(e, "TrialSwap2", "calc_combo"); exit(1); } -} +} /**************************************************************************************************/ -int TrialSwap2::swap_checkerboards (vector > &co_matrix) +int TrialSwap2::swap_checkerboards (vector > &co_matrix, int ncols, int nrows) { try { - int ncols = co_matrix[0].size(); int nrows = co_matrix.size(); - int i, j, k, l; - i = m->getRandomIndex(nrows-1); - while((j = m->getRandomIndex(nrows-1) ) == i ) {;if (m->control_pressed) { return 0; }} - k = m->getRandomIndex(ncols-1); - while((l = m->getRandomIndex(ncols-1)) == k ) {;if (m->control_pressed) { return 0; }} - - //cout << co_matrix[i][k] << " " << co_matrix[j][l] << endl; - //cout << co_matrix[i][l] << " " << co_matrix[j][k] << endl; - //cout << co_matrix[i][l] << " " << co_matrix[j][k] << endl; - //cout << co_matrix[i][l] << " " << co_matrix[j][k] << endl; - if((co_matrix[i][k]*co_matrix[j][l]==1 && co_matrix[i][l]+co_matrix[j][k]==0)||(co_matrix[i][k]+co_matrix[j][l]==0 && co_matrix[i][l]*co_matrix[j][k]==1)) //checking for checkerboard value and swap - { - co_matrix[i][k]=1-co_matrix[i][k]; - co_matrix[i][l]=1-co_matrix[i][l]; - co_matrix[j][k]=1-co_matrix[j][k]; - co_matrix[j][l]=1-co_matrix[j][l]; - //cout << "swapped!" << endl; + //do 100 runs to make sure enough swaps are happening. This does NOT mean that there will be 1000 swaps, but that is the theoretical max. + for(int a=0;a<1000;a++){ + int i, j, k, l; + i = m->getRandomIndex(nrows-1); + while((j = m->getRandomIndex(nrows-1) ) == i ) {;if (m->control_pressed) { return 0; }} + k = m->getRandomIndex(ncols-1); + while((l = m->getRandomIndex(ncols-1)) == k ) {;if (m->control_pressed) { return 0; }} + + if((co_matrix[i][k]*co_matrix[j][l]==1 && co_matrix[i][l]+co_matrix[j][k]==0)||(co_matrix[i][k]+co_matrix[j][l]==0 && co_matrix[i][l]*co_matrix[j][k]==1)) //checking for checkerboard value and swap + { + co_matrix[i][k]=1-co_matrix[i][k]; + co_matrix[i][l]=1-co_matrix[i][l]; + co_matrix[j][k]=1-co_matrix[j][k]; + co_matrix[j][l]=1-co_matrix[j][l]; + + } } - //cout << "i: " << i << " j: " << j << " k: " << " l: " << l << endl; + return 0; } catch(exception& e) { @@ -902,11 +276,11 @@ double TrialSwap2::t_test (double initialscore, int runs, double nullMean, vecto m->mothurOut("nullMean: " + toString(nullMean)); m->mothurOutEndLine(); - m->mothurOut("sum: " + toString(sum)); m->mothurOutEndLine(); + m->mothurOut("sum: " + toString(sum)); m->mothurOutEndLine(); sampleSD = sqrt( (1/runs) * sum ); - m->mothurOut("samplSD: " + toString(sampleSD)); m->mothurOutEndLine(); + m->mothurOut("samplSD: " + toString(sampleSD)); m->mothurOutEndLine(); t = (nullMean - initialscore) / (sampleSD / sqrt(runs)); @@ -918,101 +292,52 @@ double TrialSwap2::t_test (double initialscore, int runs, double nullMean, vecto } } /**************************************************************************************************/ -int TrialSwap2::print_matrix(vector > &matrix, int nrows, int ncols) +double TrialSwap2::getSD (int runs, vector scorevec, double nullMean) { - try { - m->mothurOut("matrix:"); m->mothurOutEndLine(); - - for (int i = 0; i < nrows; i++) - { - if (m->control_pressed) { return 0; } - for (int j = 0; j < ncols; j++) + try{ + double sum = 0; + for(int i=0;imothurOut(toString(matrix[i][j])); - } - m->mothurOutEndLine(); - } - return 0; + if (m->control_pressed) { return 0; } + sum += pow((scorevec[i] - nullMean),2); + } + return sqrt( (1/double(runs)) * sum ); } catch(exception& e) { - m->errorOut(e, "TrialSwap2", "print_matrix"); + m->errorOut(e, "TrialSwap2", "getSD"); exit(1); } } /**************************************************************************************************/ -int TrialSwap2::transpose_matrix (vector > &initmatrix, vector > &co_matrix)//, int nrows, int nocols) -{ +double TrialSwap2::get_zscore (double sd, double nullMean, double initscore) +{ try { - int ncols = initmatrix.size(); int nrows = initmatrix[0].size(); - int tmpnrows = nrows; - //vector > tmpvec; - vector tmprow; - if(!co_matrix.empty()) - co_matrix.clear(); - for (int i=0;icontrol_pressed) { return 0; } - for (int j=0;jerrorOut(e, "TrialSwap2", "transpose_matrix"); + m->errorOut(e, "TrialSwap2", "get_zscore"); exit(1); } } /**************************************************************************************************/ -int TrialSwap2::update_row_col_totals(vector > &co_matrix, vector &rowtotal, vector &columntotal) +int TrialSwap2::print_matrix(vector > &matrix, int nrows, int ncols) { try { - //rowtotal.clear(); - //columntotal.clear(); - //generate (rowtotal.begin(), rowtotal.end(), 0); - //generate (columntotal.begin(), columntotal.end(), 0); - int nrows = co_matrix.size(); - int ncols = co_matrix[0].size(); - vector tmpcolumntotal; tmpcolumntotal.resize(ncols, 0); - vector tmprowtotal; tmprowtotal.resize(nrows, 0); - - int rowcount = 0; + m->mothurOut("matrix:"); m->mothurOutEndLine(); for (int i = 0; i < nrows; i++) { if (m->control_pressed) { return 0; } for (int j = 0; j < ncols; j++) { - if (co_matrix[i][j] == 1) - { - rowcount++; - tmpcolumntotal[j]++; - } - } - tmprowtotal[i] = rowcount; - rowcount = 0; + m->mothurOut(toString(matrix[i][j])); + } + m->mothurOutEndLine(); } - columntotal = tmpcolumntotal; - rowtotal = tmprowtotal; - /*cout << "rowtotal: "; - for(int i = 0; ierrorOut(e, "TrialSwap2", "update_row_col_totals"); + m->errorOut(e, "TrialSwap2", "print_matrix"); exit(1); } }