5 // Created by SarahsWork on 12/3/13.
6 // Copyright (c) 2013 Schloss Lab. All rights reserved.
9 #include "communitytype.h"
11 /**************************************************************************************************/
13 //can we get these psi/psi1 calculations into their own math class?
14 //psi calcualtions swiped from gsl library...
16 static const double psi_cs[23] = {
42 static double apsi_cs[16] = {
61 /**************************************************************************************************/
62 /* coefficients for Maclaurin summation in hzeta()
65 static double hzeta_c[15] = {
66 1.00000000000000000000000000000,
67 0.083333333333333333333333333333,
68 -0.00138888888888888888888888888889,
69 0.000033068783068783068783068783069,
70 -8.2671957671957671957671957672e-07,
71 2.0876756987868098979210090321e-08,
72 -5.2841901386874931848476822022e-10,
73 1.3382536530684678832826980975e-11,
74 -3.3896802963225828668301953912e-13,
75 8.5860620562778445641359054504e-15,
76 -2.1748686985580618730415164239e-16,
77 5.5090028283602295152026526089e-18,
78 -1.3954464685812523340707686264e-19,
79 3.5347070396294674716932299778e-21,
80 -8.9535174270375468504026113181e-23
83 /**************************************************************************************************/
84 void CommunityTypeFinder::printSilData(ofstream& out, double chi, vector<double> sils){
86 out << setprecision (6) << numPartitions << '\t' << chi << '\t';
87 for (int i = 0; i < sils.size(); i++) {
88 out << sils[i] << '\t';
95 m->errorOut(e, "CommunityTypeFinder", "printSilData");
99 /**************************************************************************************************/
100 void CommunityTypeFinder::printSilData(ostream& out, double chi, vector<double> sils){
102 out << setprecision (6) << numPartitions << '\t' << chi << '\t';
103 m->mothurOutJustToLog(toString(numPartitions) + '\t' + toString(chi) + '\t');
104 for (int i = 0; i < sils.size(); i++) {
105 out << sils[i] << '\t';
106 m->mothurOutJustToLog(toString(sils[i]) + '\t');
109 m->mothurOutJustToLog("\n");
114 m->errorOut(e, "CommunityTypeFinder", "printSilData");
118 /**************************************************************************************************/
120 void CommunityTypeFinder::printZMatrix(string fileName, vector<string> sampleName){
122 ofstream printMatrix;
123 m->openOutputFile(fileName, printMatrix); //(fileName.c_str());
124 printMatrix.setf(ios::fixed, ios::floatfield);
125 printMatrix.setf(ios::showpoint);
127 for(int i=0;i<numPartitions;i++){ printMatrix << "\tPartition_" << i+1; } printMatrix << endl;
129 for(int i=0;i<numSamples;i++){
130 printMatrix << sampleName[i];
131 for(int j=0;j<numPartitions;j++){
132 printMatrix << setprecision(4) << '\t' << zMatrix[j][i];
138 catch(exception& e) {
139 m->errorOut(e, "CommunityTypeFinder", "printZMatrix");
144 /**************************************************************************************************/
146 void CommunityTypeFinder::printRelAbund(string fileName, vector<string> otuNames){
149 m->openOutputFile(fileName, printRA); //(fileName.c_str());
150 printRA.setf(ios::fixed, ios::floatfield);
151 printRA.setf(ios::showpoint);
153 vector<double> totals(numPartitions, 0.0000);
154 for(int i=0;i<numPartitions;i++){
155 for(int j=0;j<numOTUs;j++){
156 totals[i] += exp(lambdaMatrix[i][j]);
161 for(int i=0;i<numPartitions;i++){
162 printRA << "\tPartition_" << i+1 << '_' << setprecision(4) << totals[i];
163 printRA << "\tPartition_" << i+1 <<"_LCI" << "\tPartition_" << i+1 << "_UCI";
167 for(int i=0;i<numOTUs;i++){
169 if (m->control_pressed) { break; }
171 printRA << otuNames[i];
172 for(int j=0;j<numPartitions;j++){
174 if(error[j][i] >= 0.0000){
175 double std = sqrt(error[j][i]);
176 printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j];
177 printRA << '\t' << 100 * exp(lambdaMatrix[j][i] - 2.0 * std) / totals[j];
178 printRA << '\t' << 100 * exp(lambdaMatrix[j][i] + 2.0 * std) / totals[j];
181 printRA << '\t' << 100 * exp(lambdaMatrix[j][i]) / totals[j];
182 printRA << '\t' << "NA";
183 printRA << '\t' << "NA";
191 catch(exception& e) {
192 m->errorOut(e, "CommunityTypeFinder", "printRelAbund");
197 /**************************************************************************************************/
199 vector<vector<double> > CommunityTypeFinder::getHessian(){
201 vector<double> alpha(numOTUs, 0.0000);
202 double alphaSum = 0.0000;
204 vector<double> pi = zMatrix[currentPartition];
205 vector<double> psi_ajk(numOTUs, 0.0000);
206 vector<double> psi_cjk(numOTUs, 0.0000);
207 vector<double> psi1_ajk(numOTUs, 0.0000);
208 vector<double> psi1_cjk(numOTUs, 0.0000);
210 for(int j=0;j<numOTUs;j++){
212 if (m->control_pressed) { break; }
214 alpha[j] = exp(lambdaMatrix[currentPartition][j]);
215 alphaSum += alpha[j];
217 for(int i=0;i<numSamples;i++){
218 double X = (double) countMatrix[i][j];
220 psi_ajk[j] += pi[i] * psi(alpha[j]);
221 psi1_ajk[j] += pi[i] * psi1(alpha[j]);
223 psi_cjk[j] += pi[i] * psi(alpha[j] + X);
224 psi1_cjk[j] += pi[i] * psi1(alpha[j] + X);
229 double psi_Ck = 0.0000;
230 double psi1_Ck = 0.0000;
232 double weight = 0.0000;
234 for(int i=0;i<numSamples;i++){
235 if (m->control_pressed) { break; }
238 for(int j=0;j<numOTUs;j++){ sum += alpha[j] + countMatrix[i][j]; }
240 psi_Ck += pi[i] * psi(sum);
241 psi1_Ck += pi[i] * psi1(sum);
244 double psi_Ak = weight * psi(alphaSum);
245 double psi1_Ak = weight * psi1(alphaSum);
247 vector<vector<double> > hessian(numOTUs);
248 for(int i=0;i<numOTUs;i++){ hessian[i].assign(numOTUs, 0.0000); }
250 for(int i=0;i<numOTUs;i++){
251 if (m->control_pressed) { break; }
252 double term1 = -alpha[i] * (- psi_ajk[i] + psi_Ak + psi_cjk[i] - psi_Ck);
253 double term2 = -alpha[i] * alpha[i] * (-psi1_ajk[i] + psi1_Ak + psi1_cjk[i] - psi1_Ck);
254 double term3 = 0.1 * alpha[i];
256 hessian[i][i] = term1 + term2 + term3;
258 for(int j=0;j<i;j++){
259 hessian[i][j] = - alpha[i] * alpha[j] * (psi1_Ak - psi1_Ck);
260 hessian[j][i] = hessian[i][j];
267 m->errorOut(e, "CommunityTypeFinder", "getHessian");
271 /**************************************************************************************************/
273 double CommunityTypeFinder::psi1(double xx){
276 /* Euler-Maclaurin summation formula
277 * [Moshier, p. 400, with several typo corrections]
284 const double pmax = pow(kmax + xx, -s);
286 double pcp = pmax / (kmax + xx);
287 double value = pmax*((kmax+xx)/(s-1.0) + 0.5);
289 for(k=0; k<kmax; k++) {
290 if (m->control_pressed) { return 0; }
291 value += pow(k + xx, -s);
294 for(j=0; j<=jmax; j++) {
295 if (m->control_pressed) { return 0; }
296 double delta = hzeta_c[j+1] * scp * pcp;
299 if(fabs(delta/value) < 0.5*EPSILON) break;
301 scp *= (s+2*j+1)*(s+2*j+2);
302 pcp /= (kmax + xx)*(kmax + xx);
308 m->errorOut(e, "CommunityTypeFinder", "psi1");
313 /**************************************************************************************************/
315 double CommunityTypeFinder::psi(double xx){
317 double psiX = 0.0000;
321 double t1 = 1.0 / xx;
322 psiX = cheb_eval(psi_cs, 22, 2.0*xx-1.0);
326 else if(xx < 2.0000){
328 const double v = xx - 1.0;
329 psiX = cheb_eval(psi_cs, 22, 2.0*v-1.0);
333 const double t = 8.0/(xx*xx)-1.0;
334 psiX = cheb_eval(apsi_cs, 15, t);
335 psiX += log(xx) - 0.5/xx;
341 m->errorOut(e, "CommunityTypeFinder", "psi");
345 /**************************************************************************************************/
347 double CommunityTypeFinder::cheb_eval(const double seriesData[], int order, double xx){
352 double x2 = xx * 2.0000;
354 for(int j=order;j>=1;j--){
355 if (m->control_pressed) { return 0; }
357 d = x2 * d - dd + seriesData[j];
361 d = xx * d - dd + 0.5 * seriesData[0];
365 m->errorOut(e, "CommunityTypeFinder", "cheb_eval");
369 /**************************************************************************************************/
371 int CommunityTypeFinder::findkMeans(){
373 error.resize(numPartitions); for (int i = 0; i < numPartitions; i++) { error[i].resize(numOTUs, 0.0); }
374 vector<vector<double> > relativeAbundance(numSamples);
375 vector<vector<double> > alphaMatrix;
377 alphaMatrix.resize(numPartitions);
378 lambdaMatrix.resize(numPartitions);
379 for(int i=0;i<numPartitions;i++){
380 alphaMatrix[i].assign(numOTUs, 0);
381 lambdaMatrix[i].assign(numOTUs, 0);
384 //get relative abundance
385 for(int i=0;i<numSamples;i++){
386 if (m->control_pressed) { return 0; }
389 relativeAbundance[i].assign(numOTUs, 0.0);
391 for(int j=0;j<numOTUs;j++){
392 groupTotal += countMatrix[i][j];
394 for(int j=0;j<numOTUs;j++){
395 relativeAbundance[i][j] = countMatrix[i][j] / (double)groupTotal;
399 //randomly assign samples into partitions
400 zMatrix.resize(numPartitions);
401 for(int i=0;i<numPartitions;i++){
402 zMatrix[i].assign(numSamples, 0);
405 for(int i=0;i<numSamples;i++){
406 zMatrix[rand()%numPartitions][i] = 1;
409 double maxChange = 1;
413 weights.assign(numPartitions, 0);
415 while(maxChange > 1e-6 && iteration < maxIters){
417 if (m->control_pressed) { return 0; }
418 //calcualte average relative abundance
420 for(int i=0;i<numPartitions;i++){
422 double normChange = 0.0;
426 for(int j=0;j<numSamples;j++){
427 weights[i] += (double)zMatrix[i][j];
430 vector<double> averageRelativeAbundance(numOTUs, 0);
431 for(int j=0;j<numOTUs;j++){
432 for(int k=0;k<numSamples;k++){
433 averageRelativeAbundance[j] += zMatrix[i][k] * relativeAbundance[k][j];
437 for(int j=0;j<numOTUs;j++){
438 averageRelativeAbundance[j] /= weights[i];
439 double difference = averageRelativeAbundance[j] - alphaMatrix[i][j];
440 normChange += difference * difference;
441 alphaMatrix[i][j] = averageRelativeAbundance[j];
444 normChange = sqrt(normChange);
446 if(normChange > maxChange){ maxChange = normChange; }
450 //calcualte distance between each sample in partition and the average relative abundance
451 for(int i=0;i<numSamples;i++){
452 if (m->control_pressed) { return 0; }
454 double normalizationFactor = 0;
455 vector<double> totalDistToPartition(numPartitions, 0);
457 for(int j=0;j<numPartitions;j++){
458 for(int k=0;k<numOTUs;k++){
459 double difference = alphaMatrix[j][k] - relativeAbundance[i][k];
460 totalDistToPartition[j] += difference * difference;
462 totalDistToPartition[j] = sqrt(totalDistToPartition[j]);
463 normalizationFactor += exp(-50.0 * totalDistToPartition[j]);
467 for(int j=0;j<numPartitions;j++){
468 zMatrix[j][i] = exp(-50.0 * totalDistToPartition[j]) / normalizationFactor;
474 // cout << "K means: " << iteration << '\t' << maxChange << endl;
478 // cout << "Iter:-1";
479 for(int i=0;i<numPartitions;i++){
482 for(int j=0;j<numSamples;j++){
483 weights[i] += zMatrix[i][j];
485 // printf("\tw_%d=%.3f", i, weights[i]);
490 for(int i=0;i<numOTUs;i++){
491 if (m->control_pressed) { return 0; }
492 for(int j=0;j<numPartitions;j++){
493 if(alphaMatrix[j][i] > 0){
494 lambdaMatrix[j][i] = log(alphaMatrix[j][i]);
497 lambdaMatrix[j][i] = -10.0;
505 m->errorOut(e, "CommunityTypeFinder", "kMeans");
511 /**************************************************************************************************/