src/chi2.c

   1 /* chi2.c \r
   2           cc -o chi2 chi2.c -lm\r
   3           cl -O2 chi2.c\r
   4 \r
   5    This program calculates the chi-square significance values for given \r
   6    degrees of freedom and the tail probability (type I error rate) for \r
   7    given observed chi-square statistic and degree of freedom.\r
   8 \r
   9       Ziheng Yang,  October 1993.\r
  10 */\r
  11 \r
  12 #include <stdio.h>\r
  13 #include <stdlib.h>\r
  14 #include <math.h>\r
  15 \r
  16 double QuantileChi2 (double prob, double v);\r
  17 \r
  18 #define QuantileGamma(prob,alpha,beta) QuantileChi2(prob,2.0*(alpha))/(2.0*(beta))\r
  19 #define CDFGamma(x,alpha,beta) IncompleteGamma((beta)*(x),alpha,LnGammaFunction(alpha))\r
  20 #define CDFChi2(x,v) CDFGamma(x,(v)/2.0,0.5)\r
  21 \r
  22 double QuantileNormal (double prob);\r
  23 double CDFNormal (double x);\r
  24 double LnGammaFunction (double alpha);\r
  25 double IncompleteGamma (double x, double alpha, double ln_gamma_alpha);\r
  26 \r
  27 int main(int argc, char*argv[])\r
  28 {\r
  29    int i,j, n=20, ndf=200, nprob=8, option=0;\r
  30    double df, chi2, d=1.0/n, prob[]={.005, .025, .1, .5, .90, .95, .99, .999};\r
  31 \r
  32    if (argc!=2 && argc!=3) {\r
  33       printf ("\n\nChi-square critical values\n");\r
  34       for (i=0; i<ndf; i++) {\r
  35          if (i%15==0) {\r
  36             printf ("\n\t\t\t\tSignificance level\n");\r
  37             printf ("\n DF ");  \r
  38             for (j=0; j<nprob; j++) printf ("%9.4f", 1-prob[j]);\r
  39             printf ("\n");  \r
  40          }\r
  41          printf ("\n%3d ", i+1);\r
  42          for (j=0; j<nprob; j++)\r
  43             printf ("%9.4f", QuantileChi2(prob[j],(double)(i+1)));\r
  44          if (i%5==4) printf ("\n");\r
  45          if (i%15==14) {\r
  46             printf ("\nENTER for more, (q+ENTER) for quit... ");\r
  47             if (getchar()=='q') break;\r
  48          }\r
  49       }\r
  50    }\r
  51    else if(argc==2) {\r
  52       for (; ; ) {\r
  53          printf ("\nd.f. & Chi^2 value (Ctrl-c to break)? ");\r
  54          scanf ("%lf%lf", &df, &chi2);\r
  55          if(df<1 || chi2<0) break;\r
  56          prob[0] = 1-CDFChi2(chi2,df);\r
  57          printf ("\ndf = %2.0f  prob = %.9f = %.3e\n", df, prob[0], prob[0]);\r
  58       }\r
  59    }\r
  60    else if(argc==3) {\r
  61       df = atoi(argv[1]);\r
  62       chi2 = atof(argv[2]);\r
  63       if(df<1 || chi2<0) {\r
  64          printf("df = %d  ch2 = %.4f invalid", df, chi2);\r
  65          exit(-1);\r
  66       }\r
  67       prob[0] = 1 - CDFChi2(chi2, df);\r
  68       printf ("\ndf = %2.0f  prob = %.9f = %.3e\n", df, prob[0], prob[0]);\r
  69    }\r
  70    printf ("\n");\r
  71    return (0);\r
  72 }\r
  73 \r
  74 double QuantileNormal (double prob)\r
  75 {\r
  76 /* returns z so that Prob{x<z}=prob where x ~ N(0,1) and (1e-12)<prob<1-(1e-12)\r
  77    returns (-9999) if in error\r
  78    Odeh RE & Evans JO (1974) The percentage points of the normal distribution.\r
  79    Applied Statistics 22: 96-97 (AS70)\r
  80 */\r
  81    double a0=-.322232431088, a1=-1, a2=-.342242088547, a3=-.0204231210245;\r
  82    double a4=-.453642210148e-4, b0=.0993484626060, b1=.588581570495;\r
  83    double b2=.531103462366, b3=.103537752850, b4=.0038560700634;\r
  84    double y, z=0, p=prob, p1;\r
  85 \r
  86    p1 = (p<0.5 ? p : 1-p);\r
  87    if (p1<1e-20) return (-9999);\r
  88 \r
  89    y = sqrt (log(1/(p1*p1)));   \r
  90    z = y + ((((y*a4+a3)*y+a2)*y+a1)*y+a0) / ((((y*b4+b3)*y+b2)*y+b1)*y+b0);\r
  91    return (p<0.5 ? -z : z);\r
  92 }\r
  93 \r
  94 double CDFNormal (double x)\r
  95 {\r
  96 /*  Hill ID  (1973)  The normal integral.  Applied Statistics, 22:424-427.\r
  97     Algorithm AS 66.\r
  98     adapted by Z. Yang, March 1994.  Hill's routine is quite bad, and I \r
  99     haven't consulted \r
 100       Adams AG  (1969)  Algorithm 39.  Areas under the normal curve.\r
 101       Computer J. 12: 197-198.\r
 102 */\r
 103     int invers=0;\r
 104     double p, limit=10, t=1.28, y=x*x/2;\r
 105 \r
 106     if (x<0) {  invers=1;  x*=-1; }\r
 107     if (x>limit)  return (invers?0:1);\r
 108     if (x<1.28)  \r
 109        p = .5 - x * (    .398942280444 - .399903438504 * y\r
 110                    /(y + 5.75885480458 - 29.8213557808\r
 111                    /(y + 2.62433121679 + 48.6959930692\r
 112                    /(y + 5.92885724438))));\r
 113     else \r
 114        p = 0.398942280385 * exp(-y) /\r
 115            (x - 3.8052e-8 + 1.00000615302 /\r
 116            (x + 3.98064794e-4 + 1.98615381364 /\r
 117            (x - 0.151679116635 + 5.29330324926 /\r
 118            (x + 4.8385912808 - 15.1508972451 /\r
 119            (x + 0.742380924027 + 30.789933034 /\r
 120            (x + 3.99019417011))))));\r
 121 \r
 122     return  invers ? p : 1-p;\r
 123 }\r
 124 \r
 125 double LnGammaFunction (double alpha)\r
 126 {\r
 127 /* returns ln(gamma(alpha)) for alpha>0, accurate to 10 decimal places.  \r
 128    Stirling's formula is used for the central polynomial part of the procedure.\r
 129    Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function.\r
 130    Communications of the Association for Computing Machinery, 9:684\r
 131 */\r
 132    double x=alpha, f=0, z;\r
 133 \r
 134    if (x<7) {\r
 135        f=1;  z=x-1;\r
 136        while (++z<7)  f*=z;\r
 137        x=z;   f=-log(f);\r
 138    }\r
 139    z = 1/(x*x);\r
 140    return  f + (x-0.5)*log(x) - x + .918938533204673 \r
 141           + (((-.000595238095238*z+.000793650793651)*z-.002777777777778)*z\r
 142                +.083333333333333)/x;  \r
 143 }\r
 144 \r
 145 double IncompleteGamma (double x, double alpha, double ln_gamma_alpha)\r
 146 {\r
 147 /* returns the incomplete gamma ratio I(x,alpha) where x is the upper \r
 148            limit of the integration and alpha is the shape parameter.\r
 149    returns (-1) if in error\r
 150    ln_gamma_alpha = ln(Gamma(alpha)), is almost redundant.\r
 151    (1) series expansion     if (alpha>x || x<=1)\r
 152    (2) continued fraction   otherwise\r
 153    RATNEST FORTRAN by\r
 154    Bhattacharjee GP (1970) The incomplete gamma integral.  Applied Statistics,\r
 155    19: 285-287 (AS32)\r
 156 */\r
 157    int i;\r
 158    double p=alpha, g=ln_gamma_alpha;\r
 159    double accurate=1e-8, overflow=1e30;\r
 160    double factor, gin=0, rn=0, a=0,b=0,an=0,dif=0, term=0, pn[6];\r
 161 \r
 162    if (x==0) return (0);\r
 163    if (x<0 || p<=0) return (-1);\r
 164 \r
 165    factor=exp(p*log(x)-x-g);   \r
 166    if (x>1 && x>=p) goto l30;\r
 167    /* (1) series expansion */\r
 168    gin=1;  term=1;  rn=p;\r
 169  l20:\r
 170    rn++;\r
 171    term*=x/rn;   gin+=term;\r
 172 \r
 173    if (term > accurate) goto l20;\r
 174    gin*=factor/p;\r
 175    goto l50;\r
 176  l30:\r
 177    /* (2) continued fraction */\r
 178    a=1-p;   b=a+x+1;  term=0;\r
 179    pn[0]=1;  pn[1]=x;  pn[2]=x+1;  pn[3]=x*b;\r
 180    gin=pn[2]/pn[3];\r
 181  l32:\r
 182    a++;  b+=2;  term++;   an=a*term;\r
 183    for (i=0; i<2; i++) pn[i+4]=b*pn[i+2]-an*pn[i];\r
 184    if (pn[5] == 0) goto l35;\r
 185    rn=pn[4]/pn[5];   dif=fabs(gin-rn);\r
 186    if (dif>accurate) goto l34;\r
 187    if (dif<=accurate*rn) goto l42;\r
 188  l34:\r
 189    gin=rn;\r
 190  l35:\r
 191    for (i=0; i<4; i++) pn[i]=pn[i+2];\r
 192    if (fabs(pn[4]) < overflow) goto l32;\r
 193    for (i=0; i<4; i++) pn[i]/=overflow;\r
 194    goto l32;\r
 195  l42:\r
 196    gin=1-factor*gin;\r
 197 \r
 198  l50:\r
 199    return (gin);\r
 200 }\r
 201 \r
 202 \r
 203 double QuantileChi2 (double prob, double v)\r
 204 {\r
 205 /* returns z so that Prob{x<z}=prob where x is Chi2 distributed with df=v\r
 206    returns -1 if in error.   0.000002<prob<0.999998\r
 207    RATNEST FORTRAN by\r
 208        Best DJ & Roberts DE (1975) The percentage Quantiles of the \r
 209        Chi2 distribution.  Applied Statistics 24: 385-388.  (AS91)\r
 210    Converted into C by Ziheng Yang, Oct. 1993.\r
 211 */\r
 212    double e=.5e-6, aa=.6931471805, p=prob, g;\r
 213    double xx, c, ch, a=0,q=0,p1=0,p2=0,t=0,x=0,b=0,s1,s2,s3,s4,s5,s6;\r
 214 \r
 215    if (p<.000002 || p>.999998 || v<=0) return (-1);\r
 216 \r
 217    g = LnGammaFunction (v/2);\r
 218    xx=v/2;   c=xx-1;\r
 219    if (v >= -1.24*log(p)) goto l1;\r
 220 \r
 221    ch=pow((p*xx*exp(g+xx*aa)), 1/xx);\r
 222    if (ch-e<0) return (ch);\r
 223    goto l4;\r
 224 l1:\r
 225    if (v>.32) goto l3;\r
 226    ch=0.4;   a=log(1-p);\r
 227 l2:\r
 228    q=ch;  p1=1+ch*(4.67+ch);  p2=ch*(6.73+ch*(6.66+ch));\r
 229    t=-0.5+(4.67+2*ch)/p1 - (6.73+ch*(13.32+3*ch))/p2;\r
 230    ch-=(1-exp(a+g+.5*ch+c*aa)*p2/p1)/t;\r
 231    if (fabs(q/ch-1)-.01 <= 0) goto l4;\r
 232    else                       goto l2;\r
 233   \r
 234 l3: \r
 235    x = QuantileNormal (p);\r
 236    p1=0.222222/v;   ch=v*pow((x*sqrt(p1)+1-p1), 3.0);\r
 237    if (ch>2.2*v+6)  ch=-2*(log(1-p)-c*log(.5*ch)+g);\r
 238 l4:\r
 239    q=ch;   p1=.5*ch;\r
 240    if ((t=IncompleteGamma (p1, xx, g))<0) {\r
 241       printf ("\nerr IncompleteGamma");\r
 242       return (-1);\r
 243    }\r
 244    p2=p-t;\r
 245    t=p2*exp(xx*aa+g+p1-c*log(ch));   \r
 246    b=t/ch;  a=0.5*t-b*c;\r
 247 \r
 248    s1=(210+a*(140+a*(105+a*(84+a*(70+60*a))))) / 420;\r
 249    s2=(420+a*(735+a*(966+a*(1141+1278*a))))/2520;\r
 250    s3=(210+a*(462+a*(707+932*a)))/2520;\r
 251    s4=(252+a*(672+1182*a)+c*(294+a*(889+1740*a)))/5040;\r
 252    s5=(84+264*a+c*(175+606*a))/2520;\r
 253    s6=(120+c*(346+127*c))/5040;\r
 254    ch+=t*(1+0.5*t*s1-b*c*(s1-b*(s2-b*(s3-b*(s4-b*(s5-b*s6))))));\r
 255    if (fabs(q/ch-1) > e) goto l4;\r
 256 \r
 257    return (ch);\r
 258 }\r