Get the R code source of the hypergeometric distribution

The code is deactivated with "#if 0" to allow ALTree compilation

Get the R code source of the hypergeometric distribution
32b13be8 · Vincent Danjean · 88e5972b · 32b13be8
Commit 32b13be8 authored 12 years ago by Vincent Danjean
--- a/CUtils/c_sources/rhyper.c
+++ b/CUtils/c_sources/rhyper.c
+#if 0
+/*
+ *  Mathlib : A C Library of Special Functions
+ *  Copyright (C) 1998 Ross Ihaka
+ *  Copyright (C) 2000-2012 The R Core Team
+ *  Copyright (C) 2005	The R Foundation
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, a copy is available at
+ *  http://www.r-project.org/Licenses/
+ *
+ *  SYNOPSIS
+ *
+ *    #include <Rmath.h>
+ *    double rhyper(double NR, double NB, double n);
+ *
+ *  DESCRIPTION
+ *
+ *    Random variates from the hypergeometric distribution.
+ *    Returns the number of white balls drawn when kk balls
+ *    are drawn at random from an urn containing nn1 white
+ *    and nn2 black balls.
+ *
+ *  REFERENCE
+ *
+ *    V. Kachitvichyanukul and B. Schmeiser (1985).
+ *    ``Computer generation of hypergeometric random variates,''
+ *    Journal of Statistical Computation and Simulation 22, 127-145.
+ *
+ *    The original algorithm had a bug -- R bug report PR#7314 --
+ *    giving numbers slightly too small in case III h2pe
+ *    where (m < 100 || ix <= 50) , see below.
+ */
+
+#include "nmath.h"
+
+/* afc(i) :=  ln( i! )	[logarithm of the factorial i.
+ *	   If (i > 7), use Stirling's approximation, otherwise use table lookup.
+*/
+
+static double afc(int i)
+{
+    const static double al[9] =
+    {
+	0.0,
+	0.0,/*ln(0!)=ln(1)*/
+	0.0,/*ln(1!)=ln(1)*/
+	0.69314718055994530941723212145817,/*ln(2) */
+	1.79175946922805500081247735838070,/*ln(6) */
+	3.17805383034794561964694160129705,/*ln(24)*/
+	4.78749174278204599424770093452324,
+	6.57925121201010099506017829290394,
+	8.52516136106541430016553103634712
+	/*, 10.60460290274525022841722740072165*/
+    };
+    double di, value;
+
+    if (i < 0) {
+      MATHLIB_WARNING(("rhyper.c: afc(i), i=%d < 0 -- SHOULD NOT HAPPEN!\n"),
+		      i);
+      return -1;/* unreached (Wall) */
+    } else if (i <= 7) {
+	value = al[i + 1];
+    } else {
+	di = i;
+	value = (di + 0.5) * log(di) - di + 0.08333333333333 / di
+	    - 0.00277777777777 / di / di / di + 0.9189385332;
+    }
+    return value;
+}
+
+double rhyper(double nn1in, double nn2in, double kkin)
+{
+    const static double con = 57.56462733;
+    const static double deltal = 0.0078;
+    const static double deltau = 0.0034;
+    const static double scale = 1e25;
+
+    /* extern double afc(int); */
+
+    int nn1, nn2, kk;
+    int i, ix;
+    Rboolean reject, setup1, setup2;
+
+    double e, f, g, p, r, t, u, v, y;
+    double de, dg, dr, ds, dt, gl, gu, nk, nm, ub;
+    double xk, xm, xn, y1, ym, yn, yk, alv;
+
+    /* These should become `thread_local globals' : */
+    static int ks = -1;
+    static int n1s = -1, n2s = -1;
+
+    static int k, m;
+    static int minjx, maxjx, n1, n2;
+
+    static double a, d, s, w;
+    static double tn, xl, xr, kl, kr, lamdl, lamdr, p1, p2, p3;
+
+
+    /* check parameter validity */
+
+    if(!R_FINITE(nn1in) || !R_FINITE(nn2in) || !R_FINITE(kkin))
+	ML_ERR_return_NAN;
+
+    nn1 = (int) floor(nn1in+0.5);
+    nn2 = (int) floor(nn2in+0.5);
+    kk	= (int) floor(kkin +0.5);
+
+    if (nn1 < 0 || nn2 < 0 || kk < 0 || kk > nn1 + nn2)
+	ML_ERR_return_NAN;
+
+    /* if new parameter values, initialize */
+    reject = TRUE;
+    if (nn1 != n1s || nn2 != n2s) {
+	setup1 = TRUE;	setup2 = TRUE;
+    } else if (kk != ks) {
+	setup1 = FALSE;	setup2 = TRUE;
+    } else {
+	setup1 = FALSE;	setup2 = FALSE;
+    }
+    if (setup1) {
+	n1s = nn1;
+	n2s = nn2;
+	tn = nn1 + nn2;
+	if (nn1 <= nn2) {
+	    n1 = nn1;
+	    n2 = nn2;
+	} else {
+	    n1 = nn2;
+	    n2 = nn1;
+	}
+    }
+    if (setup2) {
+	ks = kk;
+	if (kk + kk >= tn) {
+	    k = (int)(tn - kk);
+	} else {
+	    k = kk;
+	}
+    }
+    if (setup1 || setup2) {
+	m = (int) ((k + 1.0) * (n1 + 1.0) / (tn + 2.0));
+	minjx = imax2(0, k - n2);
+	maxjx = imin2(n1, k);
+    }
+    /* generate random variate --- Three basic cases */
+
+    if (minjx == maxjx) { /* I: degenerate distribution ---------------- */
+	ix = maxjx;
+	/* return ix;
+	   No, need to unmangle <TSL>*/
+	/* return appropriate variate */
+
+	if (kk + kk >= tn) {
+	  if (nn1 > nn2) {
+	    ix = kk - nn2 + ix;
+	  } else {
+	    ix = nn1 - ix;
+	  }
+	} else {
+	  if (nn1 > nn2)
+	    ix = kk - ix;
+	}
+	return ix;
+
+    } else if (m - minjx < 10) { /* II: inverse transformation ---------- */
+	if (setup1 || setup2) {
+	    if (k < n2) {
+		w = exp(con + afc(n2) + afc(n1 + n2 - k)
+			- afc(n2 - k) - afc(n1 + n2));
+	    } else {
+		w = exp(con + afc(n1) + afc(k)
+			- afc(k - n2) - afc(n1 + n2));
+	    }
+	}
+      L10:
+	p = w;
+	ix = minjx;
+	u = unif_rand() * scale;
+      L20:
+	if (u > p) {
+	    u -= p;
+	    p *= (n1 - ix) * (k - ix);
+	    ix++;
+	    p = p / ix / (n2 - k + ix);
+	    if (ix > maxjx)
+		goto L10;
+	    goto L20;
+	}
+    } else { /* III : h2pe --------------------------------------------- */
+
+	if (setup1 || setup2) {
+	    s = sqrt((tn - k) * k * n1 * n2 / (tn - 1) / tn / tn);
+
+	    /* remark: d is defined in reference without int. */
+	    /* the truncation centers the cell boundaries at 0.5 */
+
+	    d = (int) (1.5 * s) + .5;
+	    xl = m - d + .5;
+	    xr = m + d + .5;
+	    a = afc(m) + afc(n1 - m) + afc(k - m) + afc(n2 - k + m);
+	    kl = exp(a - afc((int) (xl)) - afc((int) (n1 - xl))
+		     - afc((int) (k - xl))
+		     - afc((int) (n2 - k + xl)));
+	    kr = exp(a - afc((int) (xr - 1))
+		     - afc((int) (n1 - xr + 1))
+		     - afc((int) (k - xr + 1))
+		     - afc((int) (n2 - k + xr - 1)));
+	    lamdl = -log(xl * (n2 - k + xl) / (n1 - xl + 1) / (k - xl + 1));
+	    lamdr = -log((n1 - xr + 1) * (k - xr + 1) / xr / (n2 - k + xr));
+	    p1 = d + d;
+	    p2 = p1 + kl / lamdl;
+	    p3 = p2 + kr / lamdr;
+	}
+      L30:
+	u = unif_rand() * p3;
+	v = unif_rand();
+	if (u < p1) {		/* rectangular region */
+	    ix = (int) (xl + u);
+	} else if (u <= p2) {	/* left tail */
+	    ix = (int) (xl + log(v) / lamdl);
+	    if (ix < minjx)
+		goto L30;
+	    v = v * (u - p1) * lamdl;
+	} else {		/* right tail */
+	    ix = (int) (xr - log(v) / lamdr);
+	    if (ix > maxjx)
+		goto L30;
+	    v = v * (u - p2) * lamdr;
+	}
+
+	/* acceptance/rejection test */
+
+	if (m < 100 || ix <= 50) {
+	    /* explicit evaluation */
+	    /* The original algorithm (and TOMS 668) have
+		   f = f * i * (n2 - k + i) / (n1 - i) / (k - i);
+	       in the (m > ix) case, but the definition of the
+	       recurrence relation on p134 shows that the +1 is
+	       needed. */
+	    f = 1.0;
+	    if (m < ix) {
+		for (i = m + 1; i <= ix; i++)
+		    f = f * (n1 - i + 1) * (k - i + 1) / (n2 - k + i) / i;
+	    } else if (m > ix) {
+		for (i = ix + 1; i <= m; i++)
+		    f = f * i * (n2 - k + i) / (n1 - i + 1) / (k - i + 1);
+	    }
+	    if (v <= f) {
+		reject = FALSE;
+	    }
+	} else {
+	    /* squeeze using upper and lower bounds */
+	    y = ix;
+	    y1 = y + 1.0;
+	    ym = y - m;
+	    yn = n1 - y + 1.0;
+	    yk = k - y + 1.0;
+	    nk = n2 - k + y1;
+	    r = -ym / y1;
+	    s = ym / yn;
+	    t = ym / yk;
+	    e = -ym / nk;
+	    g = yn * yk / (y1 * nk) - 1.0;
+	    dg = 1.0;
+	    if (g < 0.0)
+		dg = 1.0 + g;
+	    gu = g * (1.0 + g * (-0.5 + g / 3.0));
+	    gl = gu - .25 * (g * g * g * g) / dg;
+	    xm = m + 0.5;
+	    xn = n1 - m + 0.5;
+	    xk = k - m + 0.5;
+	    nm = n2 - k + xm;
+	    ub = y * gu - m * gl + deltau
+		+ xm * r * (1. + r * (-0.5 + r / 3.0))
+		+ xn * s * (1. + s * (-0.5 + s / 3.0))
+		+ xk * t * (1. + t * (-0.5 + t / 3.0))
+		+ nm * e * (1. + e * (-0.5 + e / 3.0));
+	    /* test against upper bound */
+	    alv = log(v);
+	    if (alv > ub) {
+		reject = TRUE;
+	    } else {
+				/* test against lower bound */
+		dr = xm * (r * r * r * r);
+		if (r < 0.0)
+		    dr /= (1.0 + r);
+		ds = xn * (s * s * s * s);
+		if (s < 0.0)
+		    ds /= (1.0 + s);
+		dt = xk * (t * t * t * t);
+		if (t < 0.0)
+		    dt /= (1.0 + t);
+		de = nm * (e * e * e * e);
+		if (e < 0.0)
+		    de /= (1.0 + e);
+		if (alv < ub - 0.25 * (dr + ds + dt + de)
+		    + (y + m) * (gl - gu) - deltal) {
+		    reject = FALSE;
+		}
+		else {
+		    /* * Stirling's formula to machine accuracy
+		     */
+		    if (alv <= (a - afc(ix) - afc(n1 - ix)
+				- afc(k - ix) - afc(n2 - k + ix))) {
+			reject = FALSE;
+		    } else {
+			reject = TRUE;
+		    }
+		}
+	    }
+	}
+	if (reject)
+	    goto L30;
+    }
+
+    /* return appropriate variate */
+
+    if (kk + kk >= tn) {
+	if (nn1 > nn2) {
+	    ix = kk - nn2 + ix;
+	} else {
+	    ix = nn1 - ix;
+	}
+    } else {
+	if (nn1 > nn2)
+	    ix = kk - ix;
+    }
+    return ix;
+}
+#endif