RandomLib  1.10
 All Classes Namespaces Files Functions Variables Typedefs Enumerator Friends Macros Pages
DiscreteNormal.hpp
Go to the documentation of this file.
1 /**
2  * \file DiscreteNormal.hpp
3  * \brief Header for DiscreteNormal
4  *
5  * Sample exactly from the discrete normal distribution.
6  *
7  * Copyright (c) Charles Karney (2013) <charles@karney.com> and licensed
8  * under the MIT/X11 License. For more information, see
9  * http://randomlib.sourceforge.net/
10  **********************************************************************/
11 
12 #if !defined(RANDOMLIB_DISCRETENORMAL_HPP)
13 #define RANDOMLIB_DISCRETENORMAL_HPP 1
14 
15 #include <vector>
16 #include <limits>
17 
18 namespace RandomLib {
19  /**
20  * \brief The discrete normal distribution.
21  *
22  * Sample integers \e i with probability proportional to
23  * \f[
24  * \exp\biggl[-\frac12\biggl(\frac{i-\mu}{\sigma}\biggr)^2\biggr],
25  * \f]
26  * where &sigma; and &mu; are given as rationals (the ratio of two integers).
27  * The sampling is exact (provided that the random generator is ideal). For
28  * example
29  * \code
30  #include <iostream>
31  #include <RandomLib/Random.hpp>
32  #include <RandomLib/DiscreteNormal.hpp>
33 
34  int main() {
35  RandomLib::Random r; // Create r
36  r.Reseed(); // and give it a unique seed
37  int sigma_num = 7, sigma_den = 1, mu_num = 1, mu_den = 3;
38  RandomLib::DiscreteNormal<int> d(sigma_num, sigma_den,
39  mu_num, mu_den);
40  for (int i = 0; i < 100; ++i)
41  std::cout << d(r) << "\n";
42  }
43  \endcode
44  * prints out 100 samples with &sigma; = 7 and &mu; = 1/3.
45  *
46  * The algorithm is much the same as for ExactNormal; for details see
47  * - C. F. F. Karney, <i>Sampling exactly from the normal distribution</i>,
48  * http://arxiv.org/abs/1303.6257 (Mar. 2013).
49  * .
50  * That algorithm samples the integer part of the result \e k, samples \e x
51  * in [0,1], and (unless rejected) returns <i>s</i>(\e k + \e x), where \e s
52  * = &plusmn;1. For the discrete case, we sample \e x in [0,1) such that
53  * \f[
54  * s(k + x) = (i - \mu)/\sigma,
55  * \f]
56  * or
57  * \f[
58  * x = s(i - \mu)/\sigma - k
59  * \f]
60  * The value of \e i which results in the smallest \e x &ge; 0 is
61  * \f[
62  * i_0 = s\lceil k \sigma + s \mu\rceil
63  * \f]
64  * so sample
65  * \f[
66  * i = i_0 + sj
67  * \f]
68  * where \e j is uniformly distributed in [0, &lceil;&sigma;&rceil;). The
69  * corresponding value of \e x is
70  * \f[
71  * \begin{aligned}
72  * x &= \bigl(si_0 - (k\sigma + s\mu)\bigr)/\sigma + j/\sigma\\
73  * &= x_0 + j/\sigma,\\
74  * x_0 &= \bigl(\lceil k \sigma + s \mu\rceil -
75  * (k \sigma + s \mu)\bigr)/\sigma.
76  * \end{aligned}
77  * \f]
78  * After \e x is sampled in this way, it should be rejected if \e x &ge; 1
79  * (this is counted with the next larger value of \e k) or if \e x = 0, \e k
80  * = 0, and \e s = &minus;1 (to avoid double counting the origin). If \e x
81  * is accepted (in Step 4 of the ExactNormal algorithm), then return \e i.
82  *
83  * When &sigma; and &mu; are given as rationals, all the arithmetic outlined
84  * above can be carried out exactly. The basic rejection techniques used by
85  * ExactNormal are exact. Thus the result of this discrete form of the
86  * algorithm is also exact.
87  *
88  * RandomLib provides two classes to sample from this distribution:
89  * - DiscreteNormal which is tuned for speed on a typical general purpose
90  * computer. This assumes that random samples can be generated relatively
91  * quickly.
92  * - DiscreteNormalAlt, which is a prototype for what might be needed on a
93  * small device used for cryptography which is using a hardware generator
94  * for obtaining truly random bits. This assumption here is that the
95  * random bits are relatively expensive to obtain.
96  * .
97 
98  * The basic algorithm is the same in the two cases. The main advantages of
99  * this method are:
100  * - exact sampling (provided that the source of random numbers is ideal),
101  * - no need to cut off the tails of the distribution,
102  * - a short program involving simple integer operations only,
103  * - no dependence on external libraries (except to generate random bits),
104  * - no large tables of constants needed,
105  * - minimal time to set up for a new &sigma; and &mu; (roughly comparable to
106  * the time it takes to generate one sample),
107  * - only about 5&ndash;20 times slower than standard routines to sample from
108  * a normal distribution using plain double-precision arithmetic.
109  * - DiscreteNormalAlt exhibits ideal scaling for the consumption of random
110  * bits, namely a constant + log<sub>2</sub>&sigma;, for large &sigma;,
111  * where the constant is about 31.
112  * .
113  * The possible drawbacks of this method are:
114  * - &sigma; and &mu; are restricted to rational numbers with sufficiently
115  * small numerators and denominators to avoid overflow (this is unlikely to
116  * be a severe restriction especially if the template parameter IntType is
117  * set to <code>long long</code>),
118  * - the running time is unbounded (but not in any practical sense),
119  * - the memory consumption is unbounded (but not in any practical sense),
120  * - the toll, about 30 bits, is considerably worse than that obtained using
121  * the Knuth-Yao algorithm, for which the toll is no more than 2 (but this
122  * requires a large table which is expensive to compute and requires a lot
123  * of memory to store).
124  *
125  * This class uses a mutable private vector. So a single DiscreteNormal
126  * object cannot safely be used by multiple threads. In a multi-processing
127  * environment, each thread should use a thread-specific DiscreteNormal
128  * object.
129  *
130  * Some timing results for IntType = int, &mu; = 0, and 10<sup>8</sup>
131  * samples (time = time per sample, including setup time, rv = mean number of
132  * random variables per sample)
133  * - &sigma; = 10, time = 219 ns, rv = 17.52
134  * - &sigma; = 32, time = 223 ns, rv = 17.82
135  * - &sigma; = 1000, time = 225 ns, rv = 17.95
136  * - &sigma; = 160000, time = 226 ns, rv = 17.95
137  *
138  * @tparam IntType the integer type to use (default int).
139  **********************************************************************/
140  template<typename IntType = int> class DiscreteNormal {
141  public:
142  /**
143  * Constructor.
144  *
145  * @param[in] sigma_num the numerator of &sigma;.
146  * @param[in] sigma_den the denominator of &sigma; (default 1).
147  * @param[in] mu_num the numerator of &mu; (default 0).
148  * @param[in] mu_den the denominator of &mu; (default 1).
149  *
150  * The constructor creates a DiscreteNormal objects for sampling with
151  * specific values of &sigma; and &mu;. This may throw an exception if the
152  * parameters are such that overflow is possible. Internally &sigma; and
153  * &mu; are expressed with a common denominator, so it may be possible to
154  * avoid overflow by picking the fractions of these quantities so that \e
155  * sigma_den and \e mu_den have many common factors.
156  **********************************************************************/
157  DiscreteNormal(IntType sigma_num, IntType sigma_den = 1,
158  IntType mu_num = 0, IntType mu_den = 1);
159  /**
160  * Return a sample.
161  *
162  * @tparam Random the type of the random generator.
163  * @param[in,out] r a random generator.
164  * @return discrete normal integer.
165  **********************************************************************/
166  template<class Random>
167  IntType operator()(Random& r) const;
168  private:
169  /**
170  * sigma = _sig / _d, mu = _imu + _mu / _d, _isig = floor(sigma)
171  **********************************************************************/
172  IntType _sig, _mu, _d, _isig, _imu;
173  typedef unsigned short word;
174  /**
175  * Holds as much of intermediate uniform deviates as needed.
176  **********************************************************************/
177  mutable std::vector<word> _v;
178  mutable unsigned _m, _l;
179  /**
180  * Increment on size of _v.
181  **********************************************************************/
182  static const unsigned alloc_incr = 16;
183 
184  // ceil(n/d) for d > 0
185  static IntType iceil(IntType n, IntType d);
186  // abs(n) needed because Visual Studio's std::abs has problems
187  static IntType iabs(IntType n);
188  static IntType gcd(IntType u, IntType v);
189 
190  // After x = LeadingDigit(p), p/_sig = (x + p'/_sig)/b where p and p' are
191  // in [0, _sig) and b = 1 + max(word).
192  word LeadingDigit(IntType& p) const;
193 
194  /**
195  * Implement outcomes for choosing with prob (\e x + 2\e k) / (2\e k + 2);
196  * return:
197  * - 1 (succeed unconditionally) with prob (\e m &minus; 2) / \e m,
198  * - 0 (succeed with probability x) with prob 1 / \e m,
199  * - &minus;1 (fail unconditionally) with prob 1 / \e m.
200  **********************************************************************/
201  template<class Random> static int Choose(Random& r, int m);
202 
203  // Compute v' < v. If true set v = v'.
204  template<class Random> bool less_than(Random& r) const;
205 
206  // Compute v < (x + p/_sig)/base (updating v)
207  template<class Random> bool less_than(Random& r, word x, IntType p) const;
208 
209  // true with prob (x + p/_sig)/base
210  template<class Random> bool bernoulli(Random& r, word x, IntType p) const;
211 
212  /**
213  * Return true with probability exp(&minus;1/2).
214  **********************************************************************/
215  template<class Random> bool ExpProbH(Random& r) const;
216 
217  /**
218  * Return true with probability exp(&minus;<i>n</i>/2).
219  **********************************************************************/
220  template<class Random> bool ExpProb(Random& r, int n) const;
221 
222  /**
223  * Return \e n with probability exp(&minus;<i>n</i>/2)
224  * (1&minus;exp(&minus;1/2)).
225  **********************************************************************/
226  template<class Random> int ExpProbN(Random& r) const;
227 
228  /**
229  * Algorithm B: true with prob exp(-x * (2*k + x) / (2*k + 2)) where
230  * x = (x0 + xn / _sig)/b.
231  **********************************************************************/
232  template<class Random>
233  bool B(Random& r, int k, word x0, IntType xn) const;
234  };
235 
236  template<typename IntType> DiscreteNormal<IntType>::DiscreteNormal
237  (IntType sigma_num, IntType sigma_den,
238  IntType mu_num, IntType mu_den)
239  : _v(std::vector<word>(alloc_incr)), _m(0), _l(alloc_incr) {
240  STATIC_ASSERT(std::numeric_limits<IntType>::is_integer,
241  "DiscreteNormal: invalid integer type IntType");
242  STATIC_ASSERT(std::numeric_limits<IntType>::is_signed,
243  "DiscreteNormal: IntType must be a signed type");
244  STATIC_ASSERT(!std::numeric_limits<word>::is_signed,
245  "DiscreteNormal: word must be an unsigned type");
246  STATIC_ASSERT(std::numeric_limits<IntType>::digits + 1 >=
247  std::numeric_limits<word>::digits,
248  "DiscreteNormal: IntType must be at least as wide as word");
249  if (!( sigma_num > 0 && sigma_den > 0 && mu_den > 0 ))
250  throw RandomErr("DiscreteNormal: need sigma > 0");
251  _imu = mu_num / mu_den;
252  if (_imu == (std::numeric_limits<IntType>::min)())
253  throw RandomErr("DiscreteNormal: abs(mu) too large");
254  mu_num -= _imu * mu_den;
255  IntType l;
256  l = gcd(sigma_num, sigma_den); sigma_num /= l; sigma_den /= l;
257  l = gcd(mu_num, mu_den); mu_num /= l; mu_den /= l;
258  _isig = iceil(sigma_num, sigma_den);
259  l = gcd(sigma_den, mu_den);
260  _sig = sigma_num * (mu_den / l);
261  _mu = mu_num * (sigma_den / l);
262  _d = sigma_den * (mu_den / l);
263  // The rest of the constructor tests for possible overflow
264  // Check for overflow in computing member variables
265  IntType maxint = (std::numeric_limits<IntType>::max)();
266  if (!( mu_den / l <= maxint / sigma_num &&
267  mu_num <= maxint / (sigma_den / l) &&
268  mu_den / l <= maxint / sigma_den ))
269  throw RandomErr("DiscreteNormal: sigma or mu overflow");
270  // The probability that k = kmax is about 10^-543.
271  int kmax = 50;
272  // Check that max plausible result fits in an IntType, i.e.,
273  // _isig * (kmax + 1) + abs(_imu) does not lead to overflow.
274  if (!( kmax + 1 <= maxint / _isig &&
275  _isig * (kmax + 1) <= maxint - iabs(_imu) ))
276  throw RandomErr("DiscreteNormal: possible overflow a");
277  // Check xn0 = _sig * k + s * _mu;
278  if (!( kmax <= maxint / _sig &&
279  _sig * kmax <= maxint - iabs(_mu) ))
280  throw RandomErr("DiscreteNormal: possible overflow b");
281  // Check for overflow in LeadingDigit
282  // p << bits, p = _sig - 1, bits = 8
283  if (!( _sig <= (maxint >> 8) ))
284  throw RandomErr("DiscreteNormal: overflow in LeadingDigit");
285  }
286 
287  template<typename IntType> template<class Random>
289  for (;;) {
290  int k = ExpProbN(r);
291  if (!ExpProb(r, k * (k - 1))) continue;
292  IntType
293  s = r.Boolean() ? -1 : 1,
294  xn = _sig * IntType(k) + s * _mu,
295  i = iceil(xn, _d) + r.template Integer<IntType>(_isig);
296  xn = i * _d - xn;
297  if (xn >= _sig || (k == 0 && s < 0 && xn <= 0)) continue;
298  if (xn > 0) {
299  word x0 = LeadingDigit(xn); // Find first digit in expansion in words
300  int h = k + 1; while (h-- && B(r, k, x0, xn));
301  if (!(h < 0)) continue;
302  }
303  return s * i + _imu;
304  }
305  }
306 
307  template<typename IntType>
308  IntType DiscreteNormal<IntType>::iceil(IntType n, IntType d)
309  { IntType k = n / d; return k + (k * d < n ? 1 : 0); }
310 
311  template<typename IntType> IntType DiscreteNormal<IntType>::iabs(IntType n)
312  { return n < 0 ? -n : n; }
313 
314  template<typename IntType>
315  IntType DiscreteNormal<IntType>::gcd(IntType u, IntType v) {
316  // Knuth, TAOCP, vol 2, 4.5.2, Algorithm A
317  u = iabs(u); v = iabs(v);
318  while (v > 0) { IntType r = u % v; u = v; v = r; }
319  return u;
320  }
321 
322  template<typename IntType> typename DiscreteNormal<IntType>::word
323  DiscreteNormal<IntType>::LeadingDigit(IntType& p) const {
324  static const unsigned bits = 8;
325  static const unsigned num = std::numeric_limits<word>::digits / bits;
326  STATIC_ASSERT(bits * num == std::numeric_limits<word>::digits,
327  "Number of digits in word must be multiple of 8");
328  word s = 0;
329  for (unsigned c = num; c--;) {
330  p <<= bits; s <<= bits;
331  word d = word(p / _sig);
332  s += d;
333  p -= IntType(d) * _sig;
334  }
335  return s;
336  }
337 
338  template<typename IntType> template<class Random>
339  int DiscreteNormal<IntType>::Choose(Random& r, int m) {
340  int k = r.template Integer<int>(m);
341  return k == 0 ? 0 : (k == 1 ? -1 : 1);
342  }
343 
344  template<typename IntType> template<class Random>
345  bool DiscreteNormal<IntType>::less_than(Random& r) const {
346  for (unsigned j = 0; ; ++j) {
347  if (j == _m) {
348  // Need more bits in the old V
349  if (_l == _m) _v.resize(_l += alloc_incr);
350  _v[_m++] = r.template Integer<word>();
351  }
352  word w = r.template Integer<word>();
353  if (w > _v[j])
354  return false; // New V is bigger, so exit
355  else if (w < _v[j]) {
356  _v[j] = w; // New V is smaller, update _v
357  _m = j + 1; // adjusting its size
358  return true; // and generate the next V
359  }
360  // Else w == _v[j] and we need to check the next word
361  }
362  }
363 
364  template<typename IntType> template<class Random>
365  bool DiscreteNormal<IntType>::less_than(Random& r, word x, IntType p) const {
366  if (_m == 0) _v[_m++] = r.template Integer<word>();
367  if (_v[0] != x) return _v[0] < x;
368  for (unsigned j = 1; ; ++j) {
369  if (p == 0) return false;
370  if (j == _m) {
371  if (_l == _m) _v.resize(_l += alloc_incr);
372  _v[_m++] = r.template Integer<word>();
373  }
374  x = LeadingDigit(p);
375  if (_v[j] != x) return _v[j] < x;
376  }
377  }
378 
379  template<typename IntType> template<class Random>
380  bool DiscreteNormal<IntType>::bernoulli(Random& r, word x, IntType p) const {
381  word w = r.template Integer<word>();
382  if (w != x) return w < x;
383  for (;;) {
384  if (p == 0) return false;
385  x = LeadingDigit(p);
386  w = r.template Integer<word>();
387  if (w != x) return w < x;
388  }
389  }
390 
391  template<typename IntType> template<class Random>
392  bool DiscreteNormal<IntType>::ExpProbH(Random& r) const {
393  static const word half = word(1) << (std::numeric_limits<word>::digits - 1);
394  _m = 0;
395  if ((_v[_m++] = r.template Integer<word>()) & half) return true;
396  // Here _v < 1/2. Now loop finding decreasing V. Exit when first
397  // increasing one is found.
398  for (unsigned s = 0; ; s ^= 1) { // Parity of loop count
399  if (!less_than(r)) return s != 0u;
400  }
401  }
402 
403  template<typename IntType> template<class Random>
404  bool DiscreteNormal<IntType>::ExpProb(Random& r, int n) const {
405  while (n-- > 0) { if (!ExpProbH(r)) return false; }
406  return true;
407  }
408 
409  template<typename IntType> template<class Random>
410  int DiscreteNormal<IntType>::ExpProbN(Random& r) const {
411  int n = 0;
412  while (ExpProbH(r)) ++n;
413  return n;
414  }
415 
416  template<typename IntType> template<class Random>
417  bool DiscreteNormal<IntType>::B(Random& r, int k, word x0, IntType xn)
418  const {
419  int n = 0, h = 2 * k + 2, f;
420  _m = 0;
421  for (;; ++n) {
422  if ( ((f = k ? 0 : Choose(r, h)) < 0) ||
423  !(n ? less_than(r) : less_than(r, x0, xn)) ||
424  ((f = k ? Choose(r, h) : f) < 0) ||
425  (f == 0 && !bernoulli(r, x0, xn)) ) break;
426  }
427  return (n % 2) == 0;
428  }
429 
430 } // namespace RandomLib
431 
432 #endif // RANDOMLIB_DISCRETENORMAL_HPP
Generate random integers, reals, and booleans.
DiscreteNormal(IntType sigma_num, IntType sigma_den=1, IntType mu_num=0, IntType mu_den=1)
Exception handling for RandomLib.
Definition: Random.hpp:103
#define STATIC_ASSERT(cond, reason)
Definition: MPFRRandom.hpp:31
RandomCanonical< RandomGenerator > Random
Definition: Random.hpp:135
The discrete normal distribution.
IntType operator()(Random &r) const