1 #include "trialswap2.h"
4 //The sum_of_squares, havel_hakimi and calc_c_score algorithms have been adapted from I. Miklos and J. Podani. 2004. Randomization of presence-absence matrices: comments and new algorithms. Ecology 85:86-92.
7 /**************************************************************************************************/
8 int TrialSwap2::sim1(vector<vector<int> > &co_matrix){
11 vector<vector<int> > tmpmatrix;
12 int nrows = co_matrix.size();
13 int ncols = co_matrix[0].size();
16 // for(i=0;i<nrows;i++)
21 //cout << "building matrix" << endl;
22 for(int i=0;i<nrows;i++){
23 if (m->control_pressed) { break; }
25 for(int j=0;j<ncols;j++){
26 double randNum = rand() / double(RAND_MAX);
27 //cout << randNum << endl;
35 tmpmatrix.push_back(randRow);
39 co_matrix = tmpmatrix;
44 m->errorOut(e, "TrialSwap2", "sim1");
48 /**************************************************************************************************/
50 *row sums fixed, columns equiprobable
52 void TrialSwap2::sim2(vector<vector<int> > &co_matrix)
56 for(int i=0;i<co_matrix.size();i++)
58 if (m->control_pressed) { break; }
59 random_shuffle( co_matrix[i].begin(), co_matrix[i].end() );
63 m->errorOut(e, "TrialSwap2", "sim2");
67 /**************************************************************************************************/
68 int TrialSwap2::sim2plus(vector<int> rowtotal, vector<vector<int> > &co_matrix)
71 int nrows = co_matrix.size();
72 int ncols = co_matrix[0].size();
73 double cellprob = 1.0/ncols;
74 vector<double> cellprobvec;
76 vector<vector<int> > tmpmatrix;
81 for(int i=0; i<ncols; i++)
83 if (m->control_pressed) { return 0; }
84 cellprobvec.push_back(start + cellprob);
85 start = cellprobvec[i];
88 for(int i=0; i<nrows; i++)
90 tmprow.assign(ncols, 0);
92 while( accumulate( tmprow.begin(), tmprow.end(), 0 ) < rowtotal[i])
94 if (m->control_pressed) { return 0; }
95 double randNum = rand() / double(RAND_MAX);
96 //cout << randNum << endl;
97 if(randNum <= cellprobvec[0])
102 for(int j=1;j<ncols;j++)
104 //cout << range[j] << endl;
105 if(randNum <= cellprobvec[j] && randNum > cellprobvec[j-1] && tmprow[j] != 1)
111 tmpmatrix.push_back(tmprow);
114 co_matrix = tmpmatrix;
120 catch(exception& e) {
121 m->errorOut(e, "TrialSwap2", "sim2plus");
125 /**************************************************************************************************/
127 * same as sim2 but using initmatrix which is the initial co-occurrence matrix before transposition
128 * may have to be changed depending on what matrix 'seed' is used. One way to use is to transpose
129 * every null matrix before using an index and use the random matrix as a seed for the next null.
131 /**************************************************************************************************/
132 void TrialSwap2::sim3(vector<vector<int> > &initmatrix)
135 for(int i=0;i<initmatrix.size();i++)
137 if (m->control_pressed) { break; }
138 random_shuffle( initmatrix[i].begin(), initmatrix[i].end() );
142 catch(exception& e) {
143 m->errorOut(e, "TrialSwap2", "sim3");
147 /**************************************************************************************************/
153 /**************************************************************************************************/
154 int TrialSwap2::sim4(vector<int> columntotal, vector<int> rowtotal, vector<vector<int> > &co_matrix)
157 vector<double> colProb;
158 vector<int> tmprow;//(ncols, 7);
159 vector<vector<int> > tmpmatrix;
160 vector<double> range;
161 vector<double> randNums;
162 int ncols = columntotal.size();
163 int nrows = rowtotal.size();
166 double colSum = accumulate( columntotal.begin(), columntotal.end(), 0 );
167 //cout << "col sum: " << colSum << endl;
168 for(int i=0;i<ncols;i++)
170 if (m->control_pressed) { return 0; }
171 colProb.push_back(columntotal[i]/colSum);
176 for(int i=0;i<ncols;i++)
178 if (m->control_pressed) { return 0; }
179 range.push_back(start + colProb[i]);
183 for(int i=0;i<nrows;i++)
185 tmprow.assign(ncols, 0);
186 if (m->control_pressed) { return 0; }
188 while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < rowtotal[i])
190 if (m->control_pressed) { return 0; }
192 double randNum = rand() / double(RAND_MAX);
193 if(randNum <= range[0])
198 for(int j=1;j<ncols;j++)
200 if(randNum <= range[j] && randNum > range[j-1] && tmprow[j] != 1)
207 tmpmatrix.push_back(tmprow);
211 co_matrix = tmpmatrix;
215 catch(exception& e) {
216 m->errorOut(e, "TrialSwap2", "sim4");
220 /**************************************************************************************************/
222 * inverse of sim4, MUST BE TRANSPOSED BEFORE CO-OCCURRENCE ANALYSIS
226 /**************************************************************************************************/
227 int TrialSwap2::sim5(vector<int> initcolumntotal,vector<int> initrowtotal, vector<vector<int> > &initmatrix)
230 vector<double> colProb;
231 vector<int> tmprow;//(ncols, 7);
232 vector<vector<int> > tmpmatrix;
233 vector<double> range;
234 vector<double> randNums;
235 int ncols = initcolumntotal.size();
236 int nrows = initrowtotal.size();
240 double colSum = accumulate( initcolumntotal.begin(), initcolumntotal.end(), 0 );
241 //cout << "col sum: " << colSum << endl;
242 for(int i=0;i<ncols;i++)
244 if (m->control_pressed) { return 0; }
245 colProb.push_back(initcolumntotal[i]/colSum);
250 for(int i=0;i<ncols;i++)
252 if (m->control_pressed) { return 0; }
253 range.push_back(start + colProb[i]);
257 for(int i=0;i<nrows;i++)
259 tmprow.assign(ncols, 0);
260 if (m->control_pressed) { return 0; }
262 while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < initrowtotal[i])
264 if (m->control_pressed) { return 0; }
266 double randNum = rand() / double(RAND_MAX);
267 if(randNum <= range[0])
272 for(int j=1;j<ncols;j++)
274 if(randNum <= range[j] && randNum > range[j-1] && tmprow[j] != 1)
281 tmpmatrix.push_back(tmprow);
285 initmatrix = tmpmatrix;
288 catch(exception& e) {
289 m->errorOut(e, "TrialSwap2", "sim5");
293 /**************************************************************************************************/
299 /**************************************************************************************************/
300 int TrialSwap2::sim6(vector<int> columntotal, vector<vector<int> > &co_matrix)
303 vector<vector<int> > tmpmatrix;
304 vector<double> colProb;
306 vector<double> range;
307 int ncols = columntotal.size();
308 int nrows = co_matrix.size();
310 int colSum = accumulate( columntotal.begin(), columntotal.end(), 0 );
312 for(int i=0;i<ncols;i++)
314 if (m->control_pressed) { return 0; }
315 colProb.push_back(columntotal[i]/double (colSum));
320 for(int i=0;i<ncols;i++)
322 if (m->control_pressed) { return 0; }
323 range.push_back(start + colProb[i]);
327 for(int i=0;i<nrows;i++)
329 if (m->control_pressed) { return 0; }
330 tmprow.assign(ncols, 0);
332 tmprowtotal = (rand() / double (RAND_MAX)) * 10;
333 while ( tmprowtotal > ncols) {
334 if (m->control_pressed) { return 0; }
335 tmprowtotal = (rand() / double (RAND_MAX)) * 10;
337 //cout << tmprowtotal << endl;
338 //cout << accumulate( tmprow.begin(), tmprow.end(), 0 ) << endl;
340 while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < tmprowtotal)
342 if (m->control_pressed) { return 0; }
343 double randNum = rand() / double(RAND_MAX);
344 //cout << randNum << endl;
345 if(randNum <= range[0])
350 for(int j=1;j<ncols;j++)
352 //cout << range[j] << endl;
353 if(randNum <= range[j] && randNum > range[j-1] && tmprow[j] != 1)
363 tmpmatrix.push_back(tmprow);
367 co_matrix = tmpmatrix;
372 catch(exception& e) {
373 m->errorOut(e, "TrialSwap2", "sim6");
377 /**************************************************************************************************/
379 * MUST BE TRANSPOSED BEFORE CO-OCCURRENCE ANALYSIS
383 /**************************************************************************************************/
384 int TrialSwap2::sim7(vector<int> initrowtotal, vector<vector<int> > &co_matrix)
387 vector<vector<double> > probmatrix;
388 vector<vector<int> > tmpmatrix;
389 vector<double> colProb;
390 vector<double> probrow;
392 vector<double> range;
394 int ncols = co_matrix[0].size(); int nrows = co_matrix.size();
396 tmpmatrix.assign(nrows, vector<int>(ncols, 0.));
398 int rowsum = accumulate( initrowtotal.begin(), initrowtotal.end(), 0 );
401 //cout << nc << endl;
403 //assign null matrix based on probabilities
405 double start = 0.0; // don't reset start -- probs should be from 0-1 thoughout the entire matrix
407 for(int i=0;i<nrows;i++)
409 if (m->control_pressed) { return 0; }
410 //cout << initrowtotal[i]/double(nc) << endl;
411 double cellprob = initrowtotal[i]/double(nc);
412 //cout << cellprob << endl;
413 for(int j=0;j<ncols;j++)
416 probrow.push_back(start + cellprob);
417 //cout << probrow[j] << endl;
418 //cout << start << endl;
419 start = start + cellprob;
421 probmatrix.push_back(probrow);
426 //while(tmprowsum < rowsum)
427 //for(int k=0;k<rowsum;k++)
431 if (m->control_pressed) { return 0; }
434 //tmprowsum = accumulate( tmprowtotal.begin(), tmprowtotal.end(), 0 );
435 double randNum = rand() / double(RAND_MAX);
436 //cout << randNum << "+" << endl;
437 //special case for the first entry
438 if(randNum <= probmatrix[0][0] && tmpmatrix[0][0] != 1)
447 for(int i=0;i<nrows;i++)
449 if (m->control_pressed) { return 0; }
450 for(int j=0;j<ncols;j++)
452 //cout << probmatrix[i][j] << endl;
453 if(randNum <= probmatrix[i][j] && randNum > probmatrix[i][j-1] && tmpmatrix[i][j] != 1)
468 co_matrix = tmpmatrix;
470 //build probibility matrix
471 /* for(int i=0;i<nrows;i++)
473 for(int j=0;j<ncols;j++)
475 probrow.push_back(rowtotal[i]/nc);
477 probmatrix.pushback(probrow);
482 /* int colSum = accumulate( initcolumntotal.begin(), initcolumntotal.end(), 0 );
484 for(int i=0;i<ncols;i++)
486 colProb.push_back(initcolumntotal[i]/double (colSum));
491 for(int i=0;i<ncols;i++)
493 range.push_back(start + colProb[i]);
497 for(int i=0;i<nrows;i++)
499 tmprow.assign(ncols, 0);
501 tmprowtotal = (rand() / double (RAND_MAX)) * 10;
502 while ( tmprowtotal > ncols)
503 tmprowtotal = (rand() / double (RAND_MAX)) * 10;
504 //cout << tmprowtotal << endl;
505 //cout << accumulate( tmprow.begin(), tmprow.end(), 0 ) << endl;
507 while ( accumulate( tmprow.begin(), tmprow.end(), 0 ) < tmprowtotal)
509 double randNum = rand() / double(RAND_MAX);
510 //cout << randNum << endl;
511 if(randNum <= range[0])
516 for(int j=1;j<ncols;j++)
518 //cout << range[j] << endl;
519 if(randNum <= range[j] && randNum > range[j-1] && tmprow[j] != 1)
526 tmpmatrix.push_back(tmprow);
530 /**************************************************************************************************/
531 double TrialSwap2::calc_c_score (vector<vector<int> > &co_matrix, vector<int> rowtotal, int ncols, int nrows)
537 double normcscore = 0.0;
539 //int ncols = co_matrix[0].size(); int nrows = rowtotal.size();
540 vector<vector<double> > s; s.resize(nrows);
541 for (int i = 0; i < nrows; i++) { s[i].resize(nrows,0.0); }//only fill half the matrix
544 for(int i=0;i<nrows-1;i++)
547 for(int j=i+1;j<nrows;j++)
549 if (m->control_pressed) { return 0; }
550 for(int k=0;k<ncols;k++)
552 if((co_matrix[i][k]==1)&&(co_matrix[j][k]==1)) //if both are 1s ie co-occurrence
553 s[i][j]++; //s counts co-occurrences
556 //rowtotal[i] = A, rowtotal[j] = B, ncols = P, s[i][j] = J
557 cscore += (rowtotal[i]-s[i][j])*(rowtotal[j]-s[i][j]);///(nrows*(nrows-1)/2);
558 D = (rowtotal[i]-s[i][j])*(rowtotal[j]-s[i][j]);
560 if(ncols < (rowtotal[i] + rowtotal[j]))
562 maxD = (ncols-rowtotal[i])*(ncols-rowtotal[j]);
566 maxD = rowtotal[i] * rowtotal[j];
571 normcscore += D/maxD;
577 cscore = cscore/(double)(nrows*(nrows-1)/2);
578 //cout << "normalized c score: " << normcscore/nonzeros << endl;
582 catch(exception& e) {
583 m->errorOut(e, "TrialSwap2", "calc_c_score");
587 /**************************************************************************************************/
588 int TrialSwap2::calc_checker (vector<vector<int> > &co_matrix, vector<int> rowtotal, int ncols, int nrows)
592 //int s[nrows][ncols];
593 //int ncols = co_matrix[0].size(); int nrows = rowtotal.size();
594 vector<vector<int> > s; s.resize(nrows);
595 for (int i = 0; i < nrows; i++) { s[i].resize(nrows,0); }//only fill half the matrix
597 for(int i=0;i<nrows-1;i++)
599 for(int j=i+1;j<nrows;j++)
601 if (m->control_pressed) { return 0; }
603 for(int k=0;k<ncols;k++)
605 //cout << s[i][j] << endl;
606 //iterates through the row and counts co-occurrences. The total number of co-occurrences for each row pair is kept in matrix s at location s[i][j].
607 if((co_matrix[i][k]==1)&&(co_matrix[j][k]==1)) //if both are 1s ie co-occurrence
608 s[i][j]++; //s counts co-occurrences
611 //cout << "rowtotal: " << rowtotal[i] << endl;
612 //cout << "co-occurrences: " << s[i][j] << endl;
613 //cunits+=(rowtotal[i]-s[i][j])*(rowtotal[j]-s[i][j]);
624 catch(exception& e) {
625 m->errorOut(e, "TrialSwap2", "calc_checker");
629 /**************************************************************************************************/
630 double TrialSwap2::calc_vratio (int nrows, int ncols, vector<int> rowtotal, vector<int> columntotal)
633 //int nrows = rowtotal.size();
634 //int ncols = columntotal.size();
635 int sumCol = accumulate(columntotal.begin(), columntotal.end(), 0 );
636 // int sumRow = accumulate(rowtotal.begin(), rowtotal.end(), 0 );
638 double colAvg = (double) sumCol / (double) ncols;
639 // double rowAvg = (double) sumRow / (double) nrows;
643 // double totalRowVar = 0.0;
647 for(int i=0;i<nrows;i++)
649 if (m->control_pressed) { return 0; }
650 p = (double) rowtotal[i]/(double) ncols;
651 rowVar += p * (1.0-p);
654 for(int i=0;i<ncols;i++)
656 if (m->control_pressed) { return 0; }
657 colVar += pow(((double) columntotal[i]-colAvg),2);
660 colVar = (1.0/(double)ncols) * colVar;
662 return colVar/rowVar;
664 catch(exception& e) {
665 m->errorOut(e, "TrialSwap2", "calc_vratio");
670 /**************************************************************************************************/
671 int TrialSwap2::calc_combo (int nrows, int ncols, vector<vector<int> > &nullmatrix)
674 //need to transpose so we can compare rows (row-major order)
675 int tmpnrows = nrows;
676 vector<vector<int> > tmpmatrix;
679 if(!tmpmatrix.empty())
681 for (int i=0;i<ncols;i++)
683 for (int j=0;j<nrows;j++)
685 tmprow.push_back(nullmatrix[j][i]);
688 tmpmatrix.push_back(tmprow);
694 for(int j=0;j<ncols;j++)
697 for(int i=j+1;i<=ncols;i++)
699 //comparing matrix rows
700 if( (tmpmatrix[j] == tmpmatrix[i]))
707 //on the last iteration of a previously matched row it will add itself because it doesn't match any following rows, so that combination is counted
713 catch(exception& e) {
714 m->errorOut(e, "TrialSwap2", "calc_combo");
718 /**************************************************************************************************/
719 int TrialSwap2::swap_checkerboards (vector<vector<int> > &co_matrix)
722 int ncols = co_matrix[0].size(); int nrows = co_matrix.size();
724 i = m->getRandomIndex(nrows-1);
725 while((j = m->getRandomIndex(nrows-1) ) == i ) {;if (m->control_pressed) { return 0; }}
726 k = m->getRandomIndex(ncols-1);
727 while((l = m->getRandomIndex(ncols-1)) == k ) {;if (m->control_pressed) { return 0; }}
729 if((co_matrix[i][k]*co_matrix[j][l]==1 && co_matrix[i][l]+co_matrix[j][k]==0)||(co_matrix[i][k]+co_matrix[j][l]==0 && co_matrix[i][l]*co_matrix[j][k]==1)) //checking for checkerboard value and swap
731 co_matrix[i][k]=1-co_matrix[i][k];
732 co_matrix[i][l]=1-co_matrix[i][l];
733 co_matrix[j][k]=1-co_matrix[j][k];
734 co_matrix[j][l]=1-co_matrix[j][l];
740 catch(exception& e) {
741 m->errorOut(e, "TrialSwap2", "swap_checkerboards");
745 /**************************************************************************************************/
746 double TrialSwap2::calc_pvalue_greaterthan (vector<double> scorevec, double initialscore)
749 int runs = scorevec.size();
751 for( int i=0;i<runs;i++)
753 if (m->control_pressed) { return 0; }
754 if(scorevec[i]>=initialscore)
757 return p/(double)runs;
759 catch(exception& e) {
760 m->errorOut(e, "TrialSwap2", "calc_pvalue_greaterthan");
764 /**************************************************************************************************/
765 double TrialSwap2::calc_pvalue_lessthan (vector<double> scorevec, double initialscore)
768 int runs = scorevec.size();
770 for( int i=0;i<runs;i++)
772 if (m->control_pressed) { return 0; }
773 if(scorevec[i]<=initialscore)
776 return p/(double)runs;
778 catch(exception& e) {
779 m->errorOut(e, "TrialSwap2", "calc_pvalue_lessthan");
783 /**************************************************************************************************/
784 double TrialSwap2::t_test (double initialscore, int runs, double nullMean, vector<double> scorevec)
791 for(int i=0;i<runs;i++)
793 if (m->control_pressed) { return 0; }
794 sum += pow((scorevec[i] - nullMean),2);
795 //cout << "scorevec[" << i << "]" << scorevec[i] << endl;
798 m->mothurOut("nullMean: " + toString(nullMean)); m->mothurOutEndLine();
800 m->mothurOut("sum: " + toString(sum)); m->mothurOutEndLine();
802 sampleSD = sqrt( (1/runs) * sum );
804 m->mothurOut("samplSD: " + toString(sampleSD)); m->mothurOutEndLine();
806 t = (nullMean - initialscore) / (sampleSD / sqrt(runs));
810 catch(exception& e) {
811 m->errorOut(e, "TrialSwap2", "t_test");
815 /**************************************************************************************************/
816 int TrialSwap2::print_matrix(vector<vector<int> > &matrix, int nrows, int ncols)
819 m->mothurOut("matrix:"); m->mothurOutEndLine();
821 for (int i = 0; i < nrows; i++)
823 if (m->control_pressed) { return 0; }
824 for (int j = 0; j < ncols; j++)
826 m->mothurOut(toString(matrix[i][j]));
828 m->mothurOutEndLine();
832 catch(exception& e) {
833 m->errorOut(e, "TrialSwap2", "print_matrix");
837 /**************************************************************************************************/