double commSize = 1e20;
double sampleSize = rank->getNumSeqs();
- double aux = ceil(pow(sampleSize+1, (1/(double)3)));
- double est0 = rank->get(1)+1;
- if (aux > est0) { est0 = aux; } //est0 = max(rank->get(1)+1, aux)
+ vector<int> freqx;
+ vector<int> freqy;
+ for (int i = 1; i <=rank->getMaxRank(); i++) {
+ int abund = rank->get(i);
+ if (abund != 0) {
+ freqx.push_back(i);
+ freqy.push_back(abund);
+ }
+ }
+
+ double aux = ceil(pow((sampleSize+1), (1/(double)3)));
+ double est0 = max(freqy[0]+1, aux);
vector<double> ests;
double numr = 0.0;
- for (int i = 1; i < rank->getNumBins()-1; i++) {
+ double denr = 0.0;
+ for (int i = 0; i < freqx.size()-1; i++) {
if (m->control_pressed) { break; }
- int abund = rank->get(i);
-
- if (abund != 0) {
+ if (freqx[i+1] == freqx[i]+1) { numr = max(freqy[i+1]+1, aux); }
+ else { numr = aux; }
- int abundNext = rank->get(i+1);
- if (abundNext == 0) { numr = aux; }
- else {
- if (abundNext+1 > aux) { numr = abundNext+1; } //numr = max(abundNext+1, aux)
- else { numr = aux; }
- }
- double denr = aux;
- if (abund > aux) { denr = abund; } //denr = max(abund, aux)
- ests.push_back((abund+1)*numr/denr);
- }
+ denr = max(freqy[i], aux);
+ ests.push_back((freqx[i]+1)*numr/(double)denr);
}
numr = aux;
+ denr = max(freqy[freqy.size()-1], aux);
+ ests.push_back((freqx[freqx.size()-1]+1)*numr/(double)denr);
-
- if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; }
+ double sum = 0.0;
+ for (int i = 0; i < freqy.size(); i++) { sum += (ests[i]*freqy[i]); }
+ double nfac = est0 + sum;
+ est0 /= nfac;
+
+ for (int i = 0; i < ests.size(); i++) { ests[i] /= nfac; }
+
+ double abunup = 1 / commSize;
+ double nbrup = est0 / abunup;
+ double abunlow = ests[0];
+ double nbrlow = est0 / abunlow;
+
+ if (alpha == 1) {
+ double sum = 0.0;
+ for (int i = 0; i < freqy.size(); i++) {
+ if (m->control_pressed) { break; }
+ sum += (freqy[i] * ests[i] * log(ests[i]));
+ }
+ data[0] = -sum;
+ data[1] = exp(data[0]+nbrlow*(-abunlow*log(abunlow)));
+ data[2] = exp(data[0]+nbrup*(-abunup*log(abunup)));
+ }else {
+ for (int i = 0; i < freqy.size(); i++) {
+ if (m->control_pressed) { break; }
+ data[0] += (freqy[i] * (pow(ests[i],alpha)));
+ }
+ data[1] = pow(data[0]+nbrup*pow(abunup,alpha), (1/(1-alpha)));
+ data[2] = pow(data[0]+nbrlow*pow(abunlow,alpha), (1/(1-alpha)));
+ }
+
+ //this calc has no data[0], just a lower and upper estimate. set data[0] to lower estimate.
+ data[0] = data[1];
+ if (data[1] > data[2]) { data[1] = data[2]; data[2] = data[0]; }
+ data[0] = data[1];
+
+ if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; }
return data;
}