1 #include "trialswap2.h"
4 //The sum_of_squares, havel_hakimi and calc_c_score algorithms have been adapted from I. Miklos and J. Podani. 2004. Randomization of presence-absence matrices: comments and new algorithms. Ecology 85:86-92.
7 /**************************************************************************************************/
8 int TrialSwap2::sim1(vector<vector<int> > &co_matrix){
11 vector<vector<int> > tmpmatrix;
12 int nrows = co_matrix.size();
13 int ncols = co_matrix[0].size();
16 // for(i=0;i<nrows;i++)
21 //cout << "building matrix" << endl;
22 for(int i=0;i<nrows;i++){
23 if (m->control_pressed) { break; }
25 for(int j=0;j<ncols;j++){
26 double randNum = rand() / double(RAND_MAX);
27 //cout << randNum << endl;
35 tmpmatrix.push_back(randRow);
39 co_matrix = tmpmatrix;
44 m->errorOut(e, "TrialSwap2", "sim1");
48 /**************************************************************************************************/
50 *row sums fixed, columns equiprobable
52 void TrialSwap2::sim2(vector<vector<int> > &co_matrix)
56 for(int i=0;i<co_matrix.size();i++)
58 if (m->control_pressed) { break; }
59 random_shuffle( co_matrix[i].begin(), co_matrix[i].end() );
63 m->errorOut(e, "TrialSwap2", "sim2");
67 /**************************************************************************************************/
68 int TrialSwap2::sim2plus(vector<int> rowtotal, vector<vector<int> > &co_matrix)
71 int nrows = co_matrix.size();
72 int ncols = co_matrix[0].size();
73 double cellprob = 1.0/ncols;
74 vector<double> cellprobvec;
76 vector<vector<int> > tmpmatrix;
81 for(int i=0; i<ncols; i++)
83 if (m->control_pressed) { return 0; }
84 cellprobvec.push_back(start + cellprob);
85 start = cellprobvec[i];
88 for(int i=0; i<nrows; i++)
90 tmprow.assign(ncols, 0);
92 while( accumulate( tmprow.begin(), tmprow.end(), 0 ) < rowtotal[i])
94 if (m->control_pressed) { return 0; }
95 double randNum = rand() / double(RAND_MAX);
96 //cout << randNum << endl;
97 if(randNum <= cellprobvec[0])
102 for(int j=1;j<ncols;j++)
104 //cout << range[j] << endl;
105 if(randNum <= cellprobvec[j] && randNum > cellprobvec[j-1] && tmprow[j] != 1)
111 tmpmatrix.push_back(tmprow);
114 co_matrix = tmpmatrix;
120 catch(exception& e) {
121 m->errorOut(e, "TrialSwap2", "sim2plus");
125 /**************************************************************************************************/
127 * same as sim2 but using initmatrix which is the initial co-occurrence matrix before transposition
128 * may have to be changed depending on what matrix 'seed' is used. One way to use is to transpose
129 * every null matrix before using an index and use the random matrix as a seed for the next null.
131 /**************************************************************************************************/
132 void TrialSwap2::sim3(vector<vector<int> > &initmatrix)
135 for(int i=0;i<initmatrix.size();i++)
137 if (m->control_pressed) { break; }
138 random_shuffle( initmatrix[i].begin(), initmatrix[i].end() );
142 catch(exception& e) {
143 m->errorOut(e, "TrialSwap2", "sim3");
147 /**************************************************************************************************/
153 /**************************************************************************************************/
154 int TrialSwap2::sim4(vector<int> columntotal, vector<int> rowtotal, vector<vector<int> > &co_matrix)
157 vector<double> colProb;
158 vector<int> tmprow;//(ncols, 7);
159 vector<vector<int> > tmpmatrix;
160 vector<double> range;
161 vector<double> randNums;
162 int ncols = columntotal.size();
163 int nrows = rowtotal.size();
166 double colSum = accumulate( columntotal.begin(), columntotal.end(), 0 );
167 //cout << "col sum: " << colSum << endl;
168 for(int i=0;i<ncols;i++)
170 if (m->control_pressed) { return 0; }
171 colProb.push_back(columntotal[i]/colSum);
176 for(int i=0;i<ncols;i++)
178 if (m->control_pressed) { return 0; }
179 range.push_back(start + colProb[i]);
183 for(int i=0;i<nrows;i++)
185 tmprow.assign(ncols, 0);
186 if (m->control_pressed) { return 0; }
188 while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < rowtotal[i])
190 if (m->control_pressed) { return 0; }
192 double randNum = rand() / double(RAND_MAX);
193 if(randNum <= range[0])
198 for(int j=1;j<ncols;j++)
200 if(randNum <= range[j] && randNum > range[j-1] && tmprow[j] != 1)
207 tmpmatrix.push_back(tmprow);
211 co_matrix = tmpmatrix;
215 catch(exception& e) {
216 m->errorOut(e, "TrialSwap2", "sim4");
220 /**************************************************************************************************/
222 * inverse of sim4, MUST BE TRANSPOSED BEFORE CO-OCCURRENCE ANALYSIS
226 /**************************************************************************************************/
227 int TrialSwap2::sim5(vector<int> initcolumntotal,vector<int> initrowtotal, vector<vector<int> > &initmatrix)
230 vector<double> colProb;
231 vector<int> tmprow;//(ncols, 7);
232 vector<vector<int> > tmpmatrix;
233 vector<double> range;
234 vector<double> randNums;
235 int ncols = initcolumntotal.size();
236 int nrows = initrowtotal.size();
240 double colSum = accumulate( initcolumntotal.begin(), initcolumntotal.end(), 0 );
241 //cout << "col sum: " << colSum << endl;
242 for(int i=0;i<ncols;i++)
244 if (m->control_pressed) { return 0; }
245 colProb.push_back(initcolumntotal[i]/colSum);
250 for(int i=0;i<ncols;i++)
252 if (m->control_pressed) { return 0; }
253 range.push_back(start + colProb[i]);
257 for(int i=0;i<nrows;i++)
259 tmprow.assign(ncols, 0);
260 if (m->control_pressed) { return 0; }
262 while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < initrowtotal[i])
264 if (m->control_pressed) { return 0; }
266 double randNum = rand() / double(RAND_MAX);
267 if(randNum <= range[0])
272 for(int j=1;j<ncols;j++)
274 if(randNum <= range[j] && randNum > range[j-1] && tmprow[j] != 1)
281 tmpmatrix.push_back(tmprow);
285 initmatrix = tmpmatrix;
288 catch(exception& e) {
289 m->errorOut(e, "TrialSwap2", "sim5");
293 /**************************************************************************************************/
299 /**************************************************************************************************/
300 int TrialSwap2::sim6(vector<int> columntotal, vector<vector<int> > &co_matrix)
303 vector<vector<int> > tmpmatrix;
304 vector<double> colProb;
306 vector<double> range;
307 int ncols = columntotal.size();
308 int nrows = co_matrix.size();
310 int colSum = accumulate( columntotal.begin(), columntotal.end(), 0 );
312 for(int i=0;i<ncols;i++)
314 if (m->control_pressed) { return 0; }
315 colProb.push_back(columntotal[i]/double (colSum));
320 for(int i=0;i<ncols;i++)
322 if (m->control_pressed) { return 0; }
323 range.push_back(start + colProb[i]);
327 for(int i=0;i<nrows;i++)
329 if (m->control_pressed) { return 0; }
330 tmprow.assign(ncols, 0);
332 tmprowtotal = (rand() / double (RAND_MAX)) * 10;
333 while ( tmprowtotal > ncols) {
334 if (m->control_pressed) { return 0; }
335 tmprowtotal = (rand() / double (RAND_MAX)) * 10;
337 //cout << tmprowtotal << endl;
338 //cout << accumulate( tmprow.begin(), tmprow.end(), 0 ) << endl;
340 while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < tmprowtotal)
342 if (m->control_pressed) { return 0; }
343 double randNum = rand() / double(RAND_MAX);
344 //cout << randNum << endl;
345 if(randNum <= range[0])
350 for(int j=1;j<ncols;j++)
352 //cout << range[j] << endl;
353 if(randNum <= range[j] && randNum > range[j-1] && tmprow[j] != 1)
363 tmpmatrix.push_back(tmprow);
367 co_matrix = tmpmatrix;
372 catch(exception& e) {
373 m->errorOut(e, "TrialSwap2", "sim6");
377 /**************************************************************************************************/
379 * MUST BE TRANSPOSED BEFORE CO-OCCURRENCE ANALYSIS
383 /**************************************************************************************************/
384 int TrialSwap2::sim7(vector<int> initrowtotal, vector<vector<int> > &co_matrix)
387 vector<vector<double> > probmatrix;
388 vector<vector<int> > tmpmatrix;
389 vector<double> colProb;
390 vector<double> probrow;
392 vector<double> range;
394 int ncols = co_matrix[0].size(); int nrows = co_matrix.size();
396 tmpmatrix.assign(nrows, vector<int>(ncols, 0.));
398 int rowsum = accumulate( initrowtotal.begin(), initrowtotal.end(), 0 );
401 //cout << nc << endl;
403 //assign null matrix based on probabilities
405 double start = 0.0; // don't reset start -- probs should be from 0-1 thoughout the entire matrix
407 for(int i=0;i<nrows;i++)
409 if (m->control_pressed) { return 0; }
410 //cout << initrowtotal[i]/double(nc) << endl;
411 double cellprob = initrowtotal[i]/double(nc);
412 //cout << cellprob << endl;
413 for(int j=0;j<ncols;j++)
416 probrow.push_back(start + cellprob);
417 //cout << probrow[j] << endl;
418 //cout << start << endl;
419 start = start + cellprob;
421 probmatrix.push_back(probrow);
426 //while(tmprowsum < rowsum)
427 //for(int k=0;k<rowsum;k++)
431 if (m->control_pressed) { return 0; }
434 //tmprowsum = accumulate( tmprowtotal.begin(), tmprowtotal.end(), 0 );
435 double randNum = rand() / double(RAND_MAX);
436 //cout << randNum << "+" << endl;
437 //special case for the first entry
438 if(randNum <= probmatrix[0][0] && tmpmatrix[0][0] != 1)
447 for(int i=0;i<nrows;i++)
449 if (m->control_pressed) { return 0; }
450 for(int j=0;j<ncols;j++)
452 //cout << probmatrix[i][j] << endl;
453 if(randNum <= probmatrix[i][j] && randNum > probmatrix[i][j-1] && tmpmatrix[i][j] != 1)
468 co_matrix = tmpmatrix;
470 //build probibility matrix
471 /* for(int i=0;i<nrows;i++)
473 for(int j=0;j<ncols;j++)
475 probrow.push_back(rowtotal[i]/nc);
477 probmatrix.pushback(probrow);
482 /* int colSum = accumulate( initcolumntotal.begin(), initcolumntotal.end(), 0 );
484 for(int i=0;i<ncols;i++)
486 colProb.push_back(initcolumntotal[i]/double (colSum));
491 for(int i=0;i<ncols;i++)
493 range.push_back(start + colProb[i]);
497 for(int i=0;i<nrows;i++)
499 tmprow.assign(ncols, 0);
501 tmprowtotal = (rand() / double (RAND_MAX)) * 10;
502 while ( tmprowtotal > ncols)
503 tmprowtotal = (rand() / double (RAND_MAX)) * 10;
504 //cout << tmprowtotal << endl;
505 //cout << accumulate( tmprow.begin(), tmprow.end(), 0 ) << endl;
507 while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < tmprowtotal)
509 double randNum = rand() / double(RAND_MAX);
510 //cout << randNum << endl;
511 if(randNum <= range[0])
516 for(int j=1;j<ncols;j++)
518 //cout << range[j] << endl;
519 if(randNum <= range[j] && randNum > range[j-1] && tmprow[j] != 1)
526 tmpmatrix.push_back(tmprow);
530 initmatrix = tmpmatrix;
533 catch(exception& e) {
534 m->errorOut(e, "TrialSwap2", "sim7");
538 /**************************************************************************************************/
544 /**************************************************************************************************/
545 int TrialSwap2::sim8(vector<int> columntotal, vector<int> rowtotal, vector<vector<int> > &co_matrix)
550 int ncols = columntotal.size(); int nrows = rowtotal.size();
551 double probarray[nrows * ncols];
556 //double colSum = accumulate( columntotal.begin(), columntotal.end(), 0 );
557 double rowSum = accumulate( rowtotal.begin(), rowtotal.end(), 0 );
559 if (m->control_pressed) { return 0; }
561 //cout << "rowsum: " << rowSum << endl;
565 //create probability matrix with each site being between 0 and 1
566 for (int i=0;i<nrows;i++) {
567 if (m->control_pressed) { return 0; }
568 for (int j=0;j<ncols;j++) {
569 prob = (rowtotal[i] * columntotal[j])/(rowSum*rowSum);
571 probarray[ncols * i + j] = -1;
573 probarray[ncols * i + j] = start + prob;
574 //probmatrixrow.pushback(start + prob);
578 //cout << "prbarray" << endl;
579 //for(int i=0;i<(nrows*ncols);i++)
580 //cout << probarray[i] << " ";
583 //generate random muber between 0 and 1 and interate through probarray until found
584 while (total < grandtotal) {
585 if (m->control_pressed) { return 0; }
586 randnum = rand() / double(RAND_MAX);
587 //cout << "rand num: " << randnum << endl;
588 if((randnum <= probarray[0]) && (probarray[0] != 2) ) {
593 for(int i=1;i<(nrows*ncols);i++) {
594 if (m->control_pressed) { return 0; }
595 if((randnum <= probarray[i]) && (randnum > probarray[i-1]) && (probarray[i] != 2) ) {
604 //cout << "prbarray" << endl;
605 //for(int i=0;i<(nrows*ncols);i++)
606 //cout << probarray[i] << " ";
608 for(int i=0;i<nrows;i++) {
609 if (m->control_pressed) { return 0; }
610 for(int j=0;j<ncols;j++) {
611 if(probarray[ncols * i + j] == 2)
619 catch(exception& e) {
620 m->errorOut(e, "TrialSwap2", "sim8");
624 /**************************************************************************************************/
625 double TrialSwap2::calc_c_score (vector<vector<int> > &co_matrix,vector<int> rowtotal)
631 double normcscore = 0.0;
633 int ncols = co_matrix[0].size(); int nrows = rowtotal.size();
634 vector<vector<double> > s; s.resize(nrows);
635 for (int i = 0; i < nrows; i++) { s[i].resize(nrows,0.0); }//only fill half the matrix
638 for(int i=0;i<nrows-1;i++)
641 for(int j=i+1;j<nrows;j++)
643 if (m->control_pressed) { return 0; }
644 for(int k=0;k<ncols;k++)
646 if((co_matrix[i][k]==1)&&(co_matrix[j][k]==1)) //if both are 1s ie co-occurrence
647 s[i][j]++; //s counts co-occurrences
650 //rowtotal[i] = A, rowtotal[j] = B, ncols = P, s[i][j] = J
651 cscore += (rowtotal[i]-s[i][j])*(rowtotal[j]-s[i][j]);///(nrows*(nrows-1)/2);
652 D = (rowtotal[i]-s[i][j])*(rowtotal[j]-s[i][j]);
654 if(ncols < (rowtotal[i] + rowtotal[j]))
656 maxD = (ncols-rowtotal[i])*(ncols-rowtotal[j]);
660 maxD = rowtotal[i] * rowtotal[j];
665 normcscore += D/maxD;
671 cscore = cscore/(double)(nrows*(nrows-1)/2);
672 //cout << "normalized c score: " << normcscore/nonzeros << endl;
676 catch(exception& e) {
677 m->errorOut(e, "TrialSwap2", "calc_c_score");
681 /**************************************************************************************************/
682 int TrialSwap2::calc_checker (vector<vector<int> > &co_matrix, vector<int> rowtotal)
686 //int s[nrows][ncols];
687 int ncols = co_matrix[0].size(); int nrows = rowtotal.size();
688 vector<vector<int> > s; s.resize(nrows);
689 for (int i = 0; i < nrows; i++) { s[i].resize(nrows,0); }//only fill half the matrix
691 for(int i=0;i<nrows-1;i++)
693 for(int j=i+1;j<nrows;j++)
695 if (m->control_pressed) { return 0; }
697 for(int k=0;k<ncols;k++)
699 //cout << s[i][j] << endl;
700 //iterates through the row and counts co-occurrences. The total number of co-occurrences for each row pair is kept in matrix s at location s[i][j].
701 if((co_matrix[i][k]==1)&&(co_matrix[j][k]==1)) //if both are 1s ie co-occurrence
702 s[i][j]++; //s counts co-occurrences
705 //cout << "rowtotal: " << rowtotal[i] << endl;
706 //cout << "co-occurrences: " << s[i][j] << endl;
707 //cunits+=(rowtotal[i]-s[i][j])*(rowtotal[j]-s[i][j]);
718 catch(exception& e) {
719 m->errorOut(e, "TrialSwap2", "calc_checker");
723 /**************************************************************************************************/
724 double TrialSwap2::calc_vratio (vector<int> rowtotal, vector<int> columntotal)
727 int nrows = rowtotal.size();
728 int ncols = columntotal.size();
729 int sumCol = accumulate(columntotal.begin(), columntotal.end(), 0 );
730 // int sumRow = accumulate(rowtotal.begin(), rowtotal.end(), 0 );
732 double colAvg = (double) sumCol / (double) ncols;
733 // double rowAvg = (double) sumRow / (double) nrows;
737 // double totalRowVar = 0.0;
741 for(int i=0;i<nrows;i++)
743 if (m->control_pressed) { return 0; }
744 p = (double) rowtotal[i]/(double) ncols;
745 rowVar += p * (1.0-p);
748 for(int i=0;i<ncols;i++)
750 if (m->control_pressed) { return 0; }
751 colVar += pow(((double) columntotal[i]-colAvg),2);
754 colVar = (1.0/(double)ncols) * colVar;
756 return colVar/rowVar;
758 catch(exception& e) {
759 m->errorOut(e, "TrialSwap2", "calc_vratio");
764 /**************************************************************************************************/
765 int TrialSwap2::calc_combo (vector<vector<int> > &initmatrix)
768 int initrows = initmatrix.size();
772 for(int i=0;i<initrows;i++)
775 for(int j=i+1;j<=initrows;j++)
777 if (m->control_pressed) { return 0; }
778 if( (initmatrix[i] == initmatrix[j]))
786 //on the last iteration of a previously matched row it will add itself because it doesn't match any following rows, so that combination is counted
792 catch(exception& e) {
793 m->errorOut(e, "TrialSwap2", "calc_combo");
797 /**************************************************************************************************/
798 int TrialSwap2::swap_checkerboards (vector<vector<int> > &co_matrix)
801 int ncols = co_matrix[0].size(); int nrows = co_matrix.size();
803 i = m->getRandomIndex(nrows-1);
804 while((j = m->getRandomIndex(nrows-1) ) == i ) {;if (m->control_pressed) { return 0; }}
805 k = m->getRandomIndex(ncols-1);
806 while((l = m->getRandomIndex(ncols-1)) == k ) {;if (m->control_pressed) { return 0; }}
808 //cout << co_matrix[i][k] << " " << co_matrix[j][l] << endl;
809 //cout << co_matrix[i][l] << " " << co_matrix[j][k] << endl;
810 //cout << co_matrix[i][l] << " " << co_matrix[j][k] << endl;
811 //cout << co_matrix[i][l] << " " << co_matrix[j][k] << endl;
812 if((co_matrix[i][k]*co_matrix[j][l]==1 && co_matrix[i][l]+co_matrix[j][k]==0)||(co_matrix[i][k]+co_matrix[j][l]==0 && co_matrix[i][l]*co_matrix[j][k]==1)) //checking for checkerboard value and swap
814 co_matrix[i][k]=1-co_matrix[i][k];
815 co_matrix[i][l]=1-co_matrix[i][l];
816 co_matrix[j][k]=1-co_matrix[j][k];
817 co_matrix[j][l]=1-co_matrix[j][l];
818 //cout << "swapped!" << endl;
820 //cout << "i: " << i << " j: " << j << " k: " << " l: " << l << endl;
823 catch(exception& e) {
824 m->errorOut(e, "TrialSwap2", "swap_checkerboards");
828 /**************************************************************************************************/
829 double TrialSwap2::calc_pvalue_greaterthan (vector<double> scorevec, double initialscore)
832 int runs = scorevec.size();
834 for( int i=0;i<runs;i++)
836 if (m->control_pressed) { return 0; }
837 if(scorevec[i]>=initialscore)
840 return p/(double)runs;
842 catch(exception& e) {
843 m->errorOut(e, "TrialSwap2", "calc_pvalue_greaterthan");
847 /**************************************************************************************************/
848 double TrialSwap2::calc_pvalue_lessthan (vector<double> scorevec, double initialscore)
851 int runs = scorevec.size();
853 for( int i=0;i<runs;i++)
855 if (m->control_pressed) { return 0; }
856 if(scorevec[i]<=initialscore)
859 return p/(double)runs;
861 catch(exception& e) {
862 m->errorOut(e, "TrialSwap2", "calc_pvalue_lessthan");
866 /**************************************************************************************************/
867 double TrialSwap2::t_test (double initialscore, int runs, double nullMean, vector<double> scorevec)
874 for(int i=0;i<runs;i++)
876 if (m->control_pressed) { return 0; }
877 sum += pow((scorevec[i] - nullMean),2);
878 //cout << "scorevec[" << i << "]" << scorevec[i] << endl;
881 m->mothurOut("nullMean: " + toString(nullMean)); m->mothurOutEndLine();
883 m->mothurOut("sum: " + toString(sum)); m->mothurOutEndLine();
885 sampleSD = sqrt( (1/runs) * sum );
887 m->mothurOut("samplSD: " + toString(sampleSD)); m->mothurOutEndLine();
889 t = (nullMean - initialscore) / (sampleSD / sqrt(runs));
893 catch(exception& e) {
894 m->errorOut(e, "TrialSwap2", "t_test");
898 /**************************************************************************************************/
899 int TrialSwap2::print_matrix(vector<vector<int> > &matrix, int nrows, int ncols)
902 m->mothurOut("matrix:"); m->mothurOutEndLine();
904 for (int i = 0; i < nrows; i++)
906 if (m->control_pressed) { return 0; }
907 for (int j = 0; j < ncols; j++)
909 m->mothurOut(toString(matrix[i][j]));
911 m->mothurOutEndLine();
915 catch(exception& e) {
916 m->errorOut(e, "TrialSwap2", "print_matrix");
920 /**************************************************************************************************/
921 int TrialSwap2::transpose_matrix (vector<vector<int> > &initmatrix, vector<vector<int> > &co_matrix)//, int nrows, int nocols)
924 int ncols = initmatrix.size(); int nrows = initmatrix[0].size();
925 int tmpnrows = nrows;
926 //vector<vector<int> > tmpvec;
928 if(!co_matrix.empty())
930 for (int i=0;i<nrows;i++)
932 if (m->control_pressed) { return 0; }
933 for (int j=0;j<ncols;j++)
935 tmprow.push_back(initmatrix[j][i]);
937 /*if (accumulate( tmprow.begin(), tmprow.end(), 0 ) == 0)
942 co_matrix.push_back(tmprow);
948 catch(exception& e) {
949 m->errorOut(e, "TrialSwap2", "transpose_matrix");
953 /**************************************************************************************************/
954 int TrialSwap2::update_row_col_totals(vector<vector<int> > &co_matrix, vector<int> &rowtotal, vector<int> &columntotal)
958 //columntotal.clear();
959 //generate (rowtotal.begin(), rowtotal.end(), 0);
960 //generate (columntotal.begin(), columntotal.end(), 0);
961 int nrows = co_matrix.size();
962 int ncols = co_matrix[0].size();
963 vector<int> tmpcolumntotal; tmpcolumntotal.resize(ncols, 0);
964 vector<int> tmprowtotal; tmprowtotal.resize(nrows, 0);
968 for (int i = 0; i < nrows; i++)
970 if (m->control_pressed) { return 0; }
971 for (int j = 0; j < ncols; j++)
973 if (co_matrix[i][j] == 1)
979 tmprowtotal[i] = rowcount;
982 columntotal = tmpcolumntotal;
983 rowtotal = tmprowtotal;
984 /*cout << "rowtotal: ";
985 for(int i = 0; i<nrows; i++) { cout << rowtotal[i]; }
987 cout << " coltotal: ";
988 for(int i = 0; i<ncols; i++) { cout << columntotal[i]; }
992 catch(exception& e) {
993 m->errorOut(e, "TrialSwap2", "update_row_col_totals");
997 /**************************************************************************************************/