5 // Created by Patrick Schloss on 11/8/12.
6 // Copyright (c) 2012 University of Michigan. All rights reserved.
9 #include "qFinderDMM.h"
10 #include "linearalgebra.h"
12 #define EPSILON numeric_limits<double>::epsilon()
14 /**************************************************************************************************/
16 qFinderDMM::qFinderDMM(vector<vector<int> > cm, int p): countMatrix(cm), numPartitions(p){
18 m = MothurOut::getInstance();
19 numSamples = (int)countMatrix.size();
20 numOTUs = (int)countMatrix[0].size();
24 // cout << "done kMeans" << endl;
29 // cout << "done optimizeLambda" << endl;
31 double change = 1.0000;
36 while(change > 1.0e-6 && iter < 100){
38 // cout << "Calc_Z: ";
43 // printf("Iter:%d\t",iter);
45 for(int i=0;i<numPartitions;i++){
47 for(int j=0;j<numSamples;j++){
48 weights[i] += zMatrix[i][j];
50 // printf("w_%d=%.3f\t",i,weights[i]);
54 double nLL = getNegativeLogLikelihood();
56 change = abs(nLL - currNLL);
60 // printf("NLL=%.5f\tDelta=%.4e\n",currNLL, change);
65 error.resize(numPartitions);
67 logDeterminant = 0.0000;
71 for(currentPartition=0;currentPartition<numPartitions;currentPartition++){
73 error[currentPartition].assign(numOTUs, 0.0000);
75 if(currentPartition > 0){
76 logDeterminant += (2.0 * log(numSamples) - log(weights[currentPartition]));
78 vector<vector<double> > hessian = getHessian();
79 vector<vector<double> > invHessian = l.getInverse(hessian);
81 for(int i=0;i<numOTUs;i++){
82 logDeterminant += log(abs(hessian[i][i]));
83 error[currentPartition][i] = invHessian[i][i];
88 int numParameters = numPartitions * numOTUs + numPartitions - 1;
89 laplace = currNLL + 0.5 * logDeterminant - 0.5 * numParameters * log(2.0 * 3.14159);
90 bic = currNLL + 0.5 * log(numSamples) * numParameters;
91 aic = currNLL + numParameters;
94 m->errorOut(e, "qFinderDMM", "qFinderDMM");
99 /**************************************************************************************************/
101 void qFinderDMM::printZMatrix(string fileName, vector<string> sampleName){
103 ofstream printMatrix;
104 m->openOutputFile(fileName, printMatrix); //(fileName.c_str());
105 printMatrix.setf(ios::fixed, ios::floatfield);
106 printMatrix.setf(ios::showpoint);
108 for(int i=0;i<numPartitions;i++){ printMatrix << "\tPartition_" << i+1; } printMatrix << endl;
110 for(int i=0;i<numSamples;i++){
111 printMatrix << sampleName[i];
112 for(int j=0;j<numPartitions;j++){
113 printMatrix << setprecision(4) << '\t' << zMatrix[j][i];
119 catch(exception& e) {
120 m->errorOut(e, "qFinderDMM", "printZMatrix");
125 /**************************************************************************************************/
127 void qFinderDMM::printRelAbund(string fileName, vector<string> otuNames){
130 m->openOutputFile(fileName, printRA); //(fileName.c_str());
131 printRA.setf(ios::fixed, ios::floatfield);
132 printRA.setf(ios::showpoint);
134 vector<double> totals(numPartitions, 0.0000);
135 for(int i=0;i<numPartitions;i++){
136 for(int j=0;j<numOTUs;j++){
137 totals[i] += exp(lambdaMatrix[i][j]);
142 for(int i=0;i<numPartitions;i++){
143 printRA << "\tPartition_" << i+1 << '_' << setprecision(4) << totals[i];
144 printRA << "\tPartition_" << i+1 <<"_LCI" << "\tPartition_" << i+1 << "_UCI";
148 for(int i=0;i<numOTUs;i++){
150 if (m->control_pressed) { break; }
152 printRA << otuNames[i];
153 for(int j=0;j<numPartitions;j++){
155 if(error[j][i] >= 0.0000){
156 double std = sqrt(error[j][i]);
157 printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j];
158 printRA << '\t' << 100 * exp(lambdaMatrix[j][i] - 2.0 * std) / totals[j];
159 printRA << '\t' << 100 * exp(lambdaMatrix[j][i] + 2.0 * std) / totals[j];
162 printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j];
163 printRA << '\t' << "NA";
164 printRA << '\t' << "NA";
172 catch(exception& e) {
173 m->errorOut(e, "qFinderDMM", "printRelAbund");
178 /**************************************************************************************************/
180 // these functions for bfgs2 solver were lifted from the gnu_gsl source code...
182 /* Find a minimum in x=[0,1] of the interpolating quadratic through
183 * (0,f0) (1,f1) with derivative fp0 at x=0. The interpolating
184 * polynomial is q(x) = f0 + fp0 * z + (f1-f0-fp0) * z^2
188 interp_quad (double f0, double fp0, double f1, double zl, double zh)
190 double fl = f0 + zl*(fp0 + zl*(f1 - f0 -fp0));
191 double fh = f0 + zh*(fp0 + zh*(f1 - f0 -fp0));
192 double c = 2 * (f1 - f0 - fp0); /* curvature */
194 double zmin = zl, fmin = fl;
196 if (fh < fmin) { zmin = zh; fmin = fh; }
198 if (c > 0) /* positive curvature required for a minimum */
200 double z = -fp0 / c; /* location of minimum */
201 if (z > zl && z < zh) {
202 double f = f0 + z*(fp0 + z*(f1 - f0 -fp0));
203 if (f < fmin) { zmin = z; fmin = f; };
210 /**************************************************************************************************/
212 /* Find a minimum in x=[0,1] of the interpolating cubic through
213 * (0,f0) (1,f1) with derivatives fp0 at x=0 and fp1 at x=1.
215 * The interpolating polynomial is:
217 * c(x) = f0 + fp0 * z + eta * z^2 + xi * z^3
219 * where eta=3*(f1-f0)-2*fp0-fp1, xi=fp0+fp1-2*(f1-f0).
222 double cubic (double c0, double c1, double c2, double c3, double z){
223 return c0 + z * (c1 + z * (c2 + z * c3));
226 /**************************************************************************************************/
228 void check_extremum (double c0, double c1, double c2, double c3, double z,
229 double *zmin, double *fmin){
230 /* could make an early return by testing curvature >0 for minimum */
232 double y = cubic (c0, c1, c2, c3, z);
236 *zmin = z; /* accepted new point*/
241 /**************************************************************************************************/
243 int gsl_poly_solve_quadratic (double a, double b, double c,
244 double *x0, double *x1)
247 double disc = b * b - 4 * a * c;
249 if (a == 0) /* Handle linear case */
266 double r = fabs (0.5 * sqrt (disc) / a);
272 double sgnb = (b > 0 ? 1 : -1);
273 double temp = -0.5 * (b + sgnb * sqrt (disc));
274 double r1 = temp / a ;
275 double r2 = c / temp ;
303 /**************************************************************************************************/
305 double interp_cubic (double f0, double fp0, double f1, double fp1, double zl, double zh){
306 double eta = 3 * (f1 - f0) - 2 * fp0 - fp1;
307 double xi = fp0 + fp1 - 2 * (f1 - f0);
308 double c0 = f0, c1 = fp0, c2 = eta, c3 = xi;
312 zmin = zl; fmin = cubic(c0, c1, c2, c3, zl);
313 check_extremum (c0, c1, c2, c3, zh, &zmin, &fmin);
316 int n = gsl_poly_solve_quadratic (3 * c3, 2 * c2, c1, &z0, &z1);
318 if (n == 2) /* found 2 roots */
320 if (z0 > zl && z0 < zh)
321 check_extremum (c0, c1, c2, c3, z0, &zmin, &fmin);
322 if (z1 > zl && z1 < zh)
323 check_extremum (c0, c1, c2, c3, z1, &zmin, &fmin);
325 else if (n == 1) /* found 1 root */
327 if (z0 > zl && z0 < zh)
328 check_extremum (c0, c1, c2, c3, z0, &zmin, &fmin);
335 /**************************************************************************************************/
337 double interpolate (double a, double fa, double fpa,
338 double b, double fb, double fpb, double xmin, double xmax){
339 /* Map [a,b] to [0,1] */
340 double z, alpha, zmin, zmax;
342 zmin = (xmin - a) / (b - a);
343 zmax = (xmax - a) / (b - a);
353 z = interp_cubic (fa, fpa * (b - a), fb, fpb * (b - a), zmin, zmax);
356 z = interp_quad(fa, fpa * (b - a), fb, zmin, zmax);
360 alpha = a + z * (b - a);
365 /**************************************************************************************************/
367 int qFinderDMM::lineMinimizeFletcher(vector<double>& x, vector<double>& p, double f0, double df0, double alpha1, double& alphaNew, double& fAlpha, vector<double>& xalpha, vector<double>& gradient ){
376 double alpha = alpha1;
377 double alpha_prev = 0.0000;
379 xalpha.resize(numOTUs, 0.0000);
381 double falpha_prev = f0;
382 double dfalpha_prev = df0;
384 double a = 0.0000; double b = alpha;
385 double fa = f0; double fb = 0.0000;
386 double dfa = df0; double dfb = 0.0/0.0;
390 while(iter++ < maxIters){
391 if (m->control_pressed) { break; }
393 for(int i=0;i<numOTUs;i++){
394 xalpha[i] = x[i] + alpha * p[i];
397 fAlpha = negativeLogEvidenceLambdaPi(xalpha);
399 if(fAlpha > f0 + alpha * rho * df0 || fAlpha >= falpha_prev){
400 a = alpha_prev; b = alpha;
401 fa = falpha_prev; fb = fAlpha;
402 dfa = dfalpha_prev; dfb = 0.0/0.0;
406 negativeLogDerivEvidenceLambdaPi(xalpha, gradient);
407 double dfalpha = 0.0000;
408 for(int i=0;i<numOTUs;i++){ dfalpha += gradient[i] * p[i]; }
410 if(abs(dfalpha) <= -sigma * df0){
416 a = alpha; b = alpha_prev;
417 fa = fAlpha; fb = falpha_prev;
418 dfa = dfalpha; dfb = dfalpha_prev;
422 double delta = alpha - alpha_prev;
424 double lower = alpha + delta;
425 double upper = alpha + tau1 * delta;
427 double alphaNext = interpolate(alpha_prev, falpha_prev, dfalpha_prev, alpha, fAlpha, dfalpha, lower, upper);
430 falpha_prev = fAlpha;
431 dfalpha_prev = dfalpha;
436 while(iter++ < maxIters){
437 if (m->control_pressed) { break; }
439 double delta = b - a;
441 double lower = a + tau2 * delta;
442 double upper = b - tau3 * delta;
444 alpha = interpolate(a, fa, dfa, b, fb, dfb, lower, upper);
446 for(int i=0;i<numOTUs;i++){
447 xalpha[i] = x[i] + alpha * p[i];
450 fAlpha = negativeLogEvidenceLambdaPi(xalpha);
452 if((a - alpha) * dfa <= EPSILON){
456 if(fAlpha > f0 + rho * alpha * df0 || fAlpha >= fa){
462 double dfalpha = 0.0000;
464 negativeLogDerivEvidenceLambdaPi(xalpha, gradient);
466 for(int i=0;i<numOTUs;i++){ dfalpha += gradient[i] * p[i]; }
468 if(abs(dfalpha) <= -sigma * df0){
473 if(((b-a >= 0 && dfalpha >= 0) || ((b-a) <= 0.000 && dfalpha <= 0))){
474 b = a; fb = fa; dfb = dfa;
475 a = alpha; fa = fAlpha; dfa = dfalpha;
489 catch(exception& e) {
490 m->errorOut(e, "qFinderDMM", "lineMinimizeFletcher");
495 /**************************************************************************************************/
497 int qFinderDMM::bfgs2_Solver(vector<double>& x){
499 // cout << "bfgs2_Solver" << endl;
501 double step = 1.0e-6;
502 double delta_f = 0.0000;//f-f0;
504 vector<double> gradient;
505 double f = negativeLogEvidenceLambdaPi(x);
507 // cout << "after negLE" << endl;
509 negativeLogDerivEvidenceLambdaPi(x, gradient);
511 // cout << "after negLDE" << endl;
513 vector<double> x0 = x;
514 vector<double> g0 = gradient;
517 for(int i=0;i<numOTUs;i++){
518 g0norm += g0[i] * g0[i];
520 g0norm = sqrt(g0norm);
522 vector<double> p = gradient;
524 for(int i=0;i<numOTUs;i++){
526 pNorm += p[i] * p[i];
529 double df0 = -g0norm;
533 // cout << "before while" << endl;
535 while(g0norm > 0.001 && bfgsIter++ < maxIter){
536 if (m->control_pressed) { return 0; }
539 vector<double> dx(numOTUs, 0.0000);
541 double alphaOld, alphaNew;
543 if(pNorm == 0 || g0norm == 0 || df0 == 0){
544 dx.assign(numOTUs, 0.0000);
548 double delta = max(-delta_f, 10 * EPSILON * abs(f0));
549 alphaOld = min(1.0, 2.0 * delta / (-df0));
555 int success = lineMinimizeFletcher(x0, p, f0, df0, alphaOld, alphaNew, f, x, gradient);
564 vector<double> dx0(numOTUs);
565 vector<double> dg0(numOTUs);
567 for(int i=0;i<numOTUs;i++){
568 dx0[i] = x[i] - x0[i];
569 dg0[i] = gradient[i] - g0[i];
577 for(int i=0;i<numOTUs;i++){
578 dxg += dx0[i] * gradient[i];
579 dgg += dg0[i] * gradient[i];
580 dxdg += dx0[i] * dg0[i];
581 dgnorm += dg0[i] * dg0[i];
583 dgnorm = sqrt(dgnorm);
589 A = -(1.0 + dgnorm*dgnorm /dxdg) * B + dgg / dxdg;
596 for(int i=0;i<numOTUs;i++){ p[i] = gradient[i] - A * dx0[i] - B * dg0[i]; }
606 for(int i=0;i<numOTUs;i++){
607 pg += p[i] * gradient[i];
608 pNorm += p[i] * p[i];
609 g0norm += g0[i] * g0[i];
612 g0norm = sqrt(g0norm);
614 double dir = (pg >= 0.0) ? -1.0 : +1.0;
616 for(int i=0;i<numOTUs;i++){ p[i] *= dir / pNorm; }
620 for(int i=0;i<numOTUs;i++){
621 pNorm += p[i] * p[i];
628 // cout << "bfgsIter:\t" << bfgsIter << endl;
633 m->errorOut(e, "qFinderDMM", "bfgs2_Solver");
638 /**************************************************************************************************/
640 //can we get these psi/psi1 calculations into their own math class?
641 //psi calcualtions swiped from gsl library...
643 static const double psi_cs[23] = {
644 -.038057080835217922,
646 -.056815747821244730,
648 -.001333232857994342,
650 -.000037040238178456,
652 -.000001071263908506,
654 -.000000031353509361,
656 -.000000000921168141,
658 -.000000000027098646,
660 -.000000000000797527,
662 -.000000000000023475,
664 -.000000000000000691,
669 static double apsi_cs[16] = {
688 /**************************************************************************************************/
690 double qFinderDMM::cheb_eval(const double seriesData[], int order, double xx){
695 double x2 = xx * 2.0000;
697 for(int j=order;j>=1;j--){
698 if (m->control_pressed) { return 0; }
700 d = x2 * d - dd + seriesData[j];
704 d = xx * d - dd + 0.5 * seriesData[0];
708 m->errorOut(e, "qFinderDMM", "cheb_eval");
713 /**************************************************************************************************/
715 double qFinderDMM::psi(double xx){
717 double psiX = 0.0000;
721 double t1 = 1.0 / xx;
722 psiX = cheb_eval(psi_cs, 22, 2.0*xx-1.0);
726 else if(xx < 2.0000){
728 const double v = xx - 1.0;
729 psiX = cheb_eval(psi_cs, 22, 2.0*v-1.0);
733 const double t = 8.0/(xx*xx)-1.0;
734 psiX = cheb_eval(apsi_cs, 15, t);
735 psiX += log(xx) - 0.5/xx;
741 m->errorOut(e, "qFinderDMM", "psi");
746 /**************************************************************************************************/
748 /* coefficients for Maclaurin summation in hzeta()
751 static double hzeta_c[15] = {
752 1.00000000000000000000000000000,
753 0.083333333333333333333333333333,
754 -0.00138888888888888888888888888889,
755 0.000033068783068783068783068783069,
756 -8.2671957671957671957671957672e-07,
757 2.0876756987868098979210090321e-08,
758 -5.2841901386874931848476822022e-10,
759 1.3382536530684678832826980975e-11,
760 -3.3896802963225828668301953912e-13,
761 8.5860620562778445641359054504e-15,
762 -2.1748686985580618730415164239e-16,
763 5.5090028283602295152026526089e-18,
764 -1.3954464685812523340707686264e-19,
765 3.5347070396294674716932299778e-21,
766 -8.9535174270375468504026113181e-23
769 /**************************************************************************************************/
771 double qFinderDMM::psi1(double xx){
774 /* Euler-Maclaurin summation formula
775 * [Moshier, p. 400, with several typo corrections]
782 const double pmax = pow(kmax + xx, -s);
784 double pcp = pmax / (kmax + xx);
785 double value = pmax*((kmax+xx)/(s-1.0) + 0.5);
787 for(k=0; k<kmax; k++) {
788 if (m->control_pressed) { return 0; }
789 value += pow(k + xx, -s);
792 for(j=0; j<=jmax; j++) {
793 if (m->control_pressed) { return 0; }
794 double delta = hzeta_c[j+1] * scp * pcp;
797 if(fabs(delta/value) < 0.5*EPSILON) break;
799 scp *= (s+2*j+1)*(s+2*j+2);
800 pcp /= (kmax + xx)*(kmax + xx);
806 m->errorOut(e, "qFinderDMM", "psi1");
811 /**************************************************************************************************/
813 double qFinderDMM::negativeLogEvidenceLambdaPi(vector<double>& x){
815 vector<double> sumAlphaX(numSamples, 0.0000);
817 double logEAlpha = 0.0000;
818 double sumLambda = 0.0000;
819 double sumAlpha = 0.0000;
820 double logE = 0.0000;
822 double eta = 0.10000;
824 double weight = 0.00000;
825 for(int i=0;i<numSamples;i++){
826 weight += zMatrix[currentPartition][i];
829 for(int i=0;i<numOTUs;i++){
830 if (m->control_pressed) { return 0; }
831 double lambda = x[i];
832 double alpha = exp(x[i]);
833 logEAlpha += lgamma(alpha);
837 for(int j=0;j<numSamples;j++){
838 double X = countMatrix[j][i];
839 double alphaX = alpha + X;
840 sumAlphaX[j] += alphaX;
842 logE -= zMatrix[currentPartition][j] * lgamma(alphaX);
846 logEAlpha -= lgamma(sumAlpha);
848 for(int i=0;i<numSamples;i++){
849 logE += zMatrix[currentPartition][i] * lgamma(sumAlphaX[i]);
852 return logE + weight * logEAlpha + nu * sumAlpha - eta * sumLambda;
855 m->errorOut(e, "qFinderDMM", "negativeLogEvidenceLambdaPi");
860 /**************************************************************************************************/
862 void qFinderDMM::negativeLogDerivEvidenceLambdaPi(vector<double>& x, vector<double>& df){
864 // cout << "\tstart negativeLogDerivEvidenceLambdaPi" << endl;
866 vector<double> storeVector(numSamples, 0.0000);
867 vector<double> derivative(numOTUs, 0.0000);
868 vector<double> alpha(numOTUs, 0.0000);
870 double store = 0.0000;
874 double weight = 0.0000;
875 for(int i=0;i<numSamples;i++){
876 weight += zMatrix[currentPartition][i];
880 for(int i=0;i<numOTUs;i++){
881 if (m->control_pressed) { return; }
882 // cout << "start i loop" << endl;
884 // cout << i << '\t' << alpha[i] << '\t' << x[i] << '\t' << exp(x[i]) << '\t' << store << endl;
886 alpha[i] = exp(x[i]);
889 // cout << "before derivative" << endl;
891 derivative[i] = weight * psi(alpha[i]);
893 // cout << "after derivative" << endl;
895 // cout << i << '\t' << alpha[i] << '\t' << psi(alpha[i]) << '\t' << derivative[i] << endl;
897 for(int j=0;j<numSamples;j++){
898 double X = countMatrix[j][i];
899 double alphaX = X + alpha[i];
901 derivative[i] -= zMatrix[currentPartition][j] * psi(alphaX);
902 storeVector[j] += alphaX;
904 // cout << "end i loop" << endl;
907 double sumStore = 0.0000;
908 for(int i=0;i<numSamples;i++){
909 sumStore += zMatrix[currentPartition][i] * psi(storeVector[i]);
912 store = weight * psi(store);
914 df.resize(numOTUs, 0.0000);
916 for(int i=0;i<numOTUs;i++){
917 df[i] = alpha[i] * (nu + derivative[i] - store + sumStore) - eta;
918 // cout << i << '\t' << df[i] << endl;
920 // cout << df.size() << endl;
921 // cout << "\tend negativeLogDerivEvidenceLambdaPi" << endl;
924 m->errorOut(e, "qFinderDMM", "negativeLogDerivEvidenceLambdaPi");
929 /**************************************************************************************************/
931 double qFinderDMM::getNegativeLogEvidence(vector<double>& lambda, int group){
933 double sumAlpha = 0.0000;
934 double sumAlphaX = 0.0000;
935 double sumLnGamAlpha = 0.0000;
936 double logEvidence = 0.0000;
938 for(int i=0;i<numOTUs;i++){
939 if (m->control_pressed) { return 0; }
940 double alpha = exp(lambda[i]);
941 double X = countMatrix[group][i];
942 double alphaX = alpha + X;
944 sumLnGamAlpha += lgamma(alpha);
948 logEvidence -= lgamma(alphaX);
951 sumLnGamAlpha -= lgamma(sumAlpha);
952 logEvidence += lgamma(sumAlphaX);
954 return logEvidence + sumLnGamAlpha;
957 m->errorOut(e, "qFinderDMM", "getNegativeLogEvidence");
962 /**************************************************************************************************/
964 void qFinderDMM::kMeans(){
967 vector<vector<double> > relativeAbundance(numSamples);
968 vector<vector<double> > alphaMatrix;
970 alphaMatrix.resize(numPartitions);
971 lambdaMatrix.resize(numPartitions);
972 for(int i=0;i<numPartitions;i++){
973 alphaMatrix[i].assign(numOTUs, 0);
974 lambdaMatrix[i].assign(numOTUs, 0);
977 //get relative abundance
978 for(int i=0;i<numSamples;i++){
979 if (m->control_pressed) { return; }
982 relativeAbundance[i].assign(numOTUs, 0.0);
984 for(int j=0;j<numOTUs;j++){
985 groupTotal += countMatrix[i][j];
987 for(int j=0;j<numOTUs;j++){
988 relativeAbundance[i][j] = countMatrix[i][j] / (double)groupTotal;
992 //randomly assign samples into partitions
993 zMatrix.resize(numPartitions);
994 for(int i=0;i<numPartitions;i++){
995 zMatrix[i].assign(numSamples, 0);
998 for(int i=0;i<numSamples;i++){
999 zMatrix[rand()%numPartitions][i] = 1;
1002 double maxChange = 1;
1003 int maxIters = 1000;
1006 weights.assign(numPartitions, 0);
1008 while(maxChange > 1e-6 && iteration < maxIters){
1010 if (m->control_pressed) { return; }
1011 //calcualte average relative abundance
1013 for(int i=0;i<numPartitions;i++){
1015 double normChange = 0.0;
1019 for(int j=0;j<numSamples;j++){
1020 weights[i] += (double)zMatrix[i][j];
1023 vector<double> averageRelativeAbundance(numOTUs, 0);
1024 for(int j=0;j<numOTUs;j++){
1025 for(int k=0;k<numSamples;k++){
1026 averageRelativeAbundance[j] += zMatrix[i][k] * relativeAbundance[k][j];
1030 for(int j=0;j<numOTUs;j++){
1031 averageRelativeAbundance[j] /= weights[i];
1032 double difference = averageRelativeAbundance[j] - alphaMatrix[i][j];
1033 normChange += difference * difference;
1034 alphaMatrix[i][j] = averageRelativeAbundance[j];
1037 normChange = sqrt(normChange);
1039 if(normChange > maxChange){ maxChange = normChange; }
1043 //calcualte distance between each sample in partition adn the average relative abundance
1044 for(int i=0;i<numSamples;i++){
1045 if (m->control_pressed) { return; }
1047 double normalizationFactor = 0;
1048 vector<double> totalDistToPartition(numPartitions, 0);
1050 for(int j=0;j<numPartitions;j++){
1051 for(int k=0;k<numOTUs;k++){
1052 double difference = alphaMatrix[j][k] - relativeAbundance[i][k];
1053 totalDistToPartition[j] += difference * difference;
1055 totalDistToPartition[j] = sqrt(totalDistToPartition[j]);
1056 normalizationFactor += exp(-50.0 * totalDistToPartition[j]);
1060 for(int j=0;j<numPartitions;j++){
1061 zMatrix[j][i] = exp(-50.0 * totalDistToPartition[j]) / normalizationFactor;
1067 // cout << "K means: " << iteration << '\t' << maxChange << endl;
1071 // cout << "Iter:-1";
1072 for(int i=0;i<numPartitions;i++){
1073 weights[i] = 0.0000;
1075 for(int j=0;j<numSamples;j++){
1076 weights[i] += zMatrix[i][j];
1078 // printf("\tw_%d=%.3f", i, weights[i]);
1083 for(int i=0;i<numOTUs;i++){
1084 if (m->control_pressed) { return; }
1085 for(int j=0;j<numPartitions;j++){
1086 if(alphaMatrix[j][i] > 0){
1087 lambdaMatrix[j][i] = log(alphaMatrix[j][i]);
1090 lambdaMatrix[j][i] = -10.0;
1095 catch(exception& e){
1096 m->errorOut(e, "qFinderDMM", "kMeans");
1101 /**************************************************************************************************/
1103 void qFinderDMM::optimizeLambda(){
1105 for(currentPartition=0;currentPartition<numPartitions;currentPartition++){
1106 if (m->control_pressed) { return; }
1107 bfgs2_Solver(lambdaMatrix[currentPartition]);
1110 catch(exception& e){
1111 m->errorOut(e, "qFinderDMM", "optimizeLambda");
1116 /**************************************************************************************************/
1118 void qFinderDMM::calculatePiK(){
1120 vector<double> store(numPartitions);
1122 for(int i=0;i<numSamples;i++){
1123 if (m->control_pressed) { return; }
1124 double sum = 0.0000;
1125 double minNegLogEvidence =numeric_limits<double>::max();
1127 for(int j=0;j<numPartitions;j++){
1128 double negLogEvidenceJ = getNegativeLogEvidence(lambdaMatrix[j], i);
1130 if(negLogEvidenceJ < minNegLogEvidence){
1131 minNegLogEvidence = negLogEvidenceJ;
1133 store[j] = negLogEvidenceJ;
1136 for(int j=0;j<numPartitions;j++){
1137 if (m->control_pressed) { return; }
1138 zMatrix[j][i] = weights[j] * exp(-(store[j] - minNegLogEvidence));
1139 sum += zMatrix[j][i];
1142 for(int j=0;j<numPartitions;j++){
1143 zMatrix[j][i] /= sum;
1148 catch(exception& e){
1149 m->errorOut(e, "qFinderDMM", "calculatePiK");
1155 /**************************************************************************************************/
1157 double qFinderDMM::getNegativeLogLikelihood(){
1159 double eta = 0.10000;
1160 double nu = 0.10000;
1162 vector<double> pi(numPartitions, 0.0000);
1163 vector<double> logBAlpha(numPartitions, 0.0000);
1165 double doubleSum = 0.0000;
1167 for(int i=0;i<numPartitions;i++){
1168 if (m->control_pressed) { return 0; }
1169 double sumAlphaK = 0.0000;
1171 pi[i] = weights[i] / (double)numSamples;
1173 for(int j=0;j<numOTUs;j++){
1174 double alpha = exp(lambdaMatrix[i][j]);
1177 logBAlpha[i] += lgamma(alpha);
1179 logBAlpha[i] -= lgamma(sumAlphaK);
1182 for(int i=0;i<numSamples;i++){
1183 if (m->control_pressed) { return 0; }
1185 double probability = 0.0000;
1186 double factor = 0.0000;
1187 double sum = 0.0000;
1188 vector<double> logStore(numPartitions, 0.0000);
1189 double offset = -numeric_limits<double>::max();
1191 for(int j=0;j<numOTUs;j++){
1192 sum += countMatrix[i][j];
1193 factor += lgamma(countMatrix[i][j] + 1.0000);
1195 factor -= lgamma(sum + 1.0);
1197 for(int k=0;k<numPartitions;k++){
1199 double sumAlphaKX = 0.0000;
1200 double logBAlphaX = 0.0000;
1202 for(int j=0;j<numOTUs;j++){
1203 double alphaX = exp(lambdaMatrix[k][j]) + (double)countMatrix[i][j];
1205 sumAlphaKX += alphaX;
1206 logBAlphaX += lgamma(alphaX);
1209 logBAlphaX -= lgamma(sumAlphaKX);
1211 logStore[k] = logBAlphaX - logBAlpha[k] - factor;
1212 if(logStore[k] > offset){
1213 offset = logStore[k];
1218 for(int k=0;k<numPartitions;k++){
1219 probability += pi[k] * exp(-offset + logStore[k]);
1221 doubleSum += log(probability) + offset;
1225 double L5 = - numOTUs * numPartitions * lgamma(eta);
1226 double L6 = eta * numPartitions * numOTUs * log(nu);
1228 double alphaSum, lambdaSum;
1229 alphaSum = lambdaSum = 0.0000;
1231 for(int i=0;i<numPartitions;i++){
1232 for(int j=0;j<numOTUs;j++){
1233 if (m->control_pressed) { return 0; }
1234 alphaSum += exp(lambdaMatrix[i][j]);
1235 lambdaSum += lambdaMatrix[i][j];
1241 return (-doubleSum - L5 - L6 - alphaSum - lambdaSum);
1243 catch(exception& e){
1244 m->errorOut(e, "qFinderDMM", "getNegativeLogLikelihood");
1251 /**************************************************************************************************/
1253 vector<vector<double> > qFinderDMM::getHessian(){
1255 vector<double> alpha(numOTUs, 0.0000);
1256 double alphaSum = 0.0000;
1258 vector<double> pi = zMatrix[currentPartition];
1259 vector<double> psi_ajk(numOTUs, 0.0000);
1260 vector<double> psi_cjk(numOTUs, 0.0000);
1261 vector<double> psi1_ajk(numOTUs, 0.0000);
1262 vector<double> psi1_cjk(numOTUs, 0.0000);
1264 for(int j=0;j<numOTUs;j++){
1266 if (m->control_pressed) { break; }
1268 alpha[j] = exp(lambdaMatrix[currentPartition][j]);
1269 alphaSum += alpha[j];
1271 for(int i=0;i<numSamples;i++){
1272 double X = (double) countMatrix[i][j];
1274 psi_ajk[j] += pi[i] * psi(alpha[j]);
1275 psi1_ajk[j] += pi[i] * psi1(alpha[j]);
1277 psi_cjk[j] += pi[i] * psi(alpha[j] + X);
1278 psi1_cjk[j] += pi[i] * psi1(alpha[j] + X);
1283 double psi_Ck = 0.0000;
1284 double psi1_Ck = 0.0000;
1286 double weight = 0.0000;
1288 for(int i=0;i<numSamples;i++){
1289 if (m->control_pressed) { break; }
1291 double sum = 0.0000;
1292 for(int j=0;j<numOTUs;j++){ sum += alpha[j] + countMatrix[i][j]; }
1294 psi_Ck += pi[i] * psi(sum);
1295 psi1_Ck += pi[i] * psi1(sum);
1298 double psi_Ak = weight * psi(alphaSum);
1299 double psi1_Ak = weight * psi1(alphaSum);
1301 vector<vector<double> > hessian(numOTUs);
1302 for(int i=0;i<numOTUs;i++){ hessian[i].assign(numOTUs, 0.0000); }
1304 for(int i=0;i<numOTUs;i++){
1305 if (m->control_pressed) { break; }
1306 double term1 = -alpha[i] * (- psi_ajk[i] + psi_Ak + psi_cjk[i] - psi_Ck);
1307 double term2 = -alpha[i] * alpha[i] * (-psi1_ajk[i] + psi1_Ak + psi1_cjk[i] - psi1_Ck);
1308 double term3 = 0.1 * alpha[i];
1310 hessian[i][i] = term1 + term2 + term3;
1312 for(int j=0;j<i;j++){
1313 hessian[i][j] = - alpha[i] * alpha[j] * (psi1_Ak - psi1_Ck);
1314 hessian[j][i] = hessian[i][j];
1320 catch(exception& e){
1321 m->errorOut(e, "qFinderDMM", "getHessian");
1326 /**************************************************************************************************/