RandomLib  1.10
 All Classes Namespaces Files Functions Variables Typedefs Enumerator Friends Macros Pages
RandomSelect.hpp
Go to the documentation of this file.
1 /**
2  * \file RandomSelect.hpp
3  * \brief Header for RandomSelect.
4  *
5  * An implementation of the Walker algorithm for selecting from a finite set.
6  *
7  * Copyright (c) Charles Karney (2006-2011) <charles@karney.com> and licensed
8  * under the MIT/X11 License. For more information, see
9  * http://randomlib.sourceforge.net/
10  **********************************************************************/
11 
12 #if !defined(RANDOMLIB_RANDOMSELECT_HPP)
13 #define RANDOMLIB_RANDOMSELECT_HPP 1
14 
15 #include <vector>
16 #include <limits>
17 #include <stdexcept>
18 
19 #if defined(_MSC_VER)
20 // Squelch warnings about constant conditional expressions
21 # pragma warning (push)
22 # pragma warning (disable: 4127)
23 #endif
24 
25 namespace RandomLib {
26  /**
27  * \brief Random selection from a discrete set.
28  *
29  * An implementation of Walker algorithm for selecting from a finite set
30  * (following Knuth, TAOCP, Vol 2, Sec 3.4.1.A). This provides a rapid way
31  * of selecting one of several choices depending on a discrete set weights.
32  * Original citation is\n A. J. Walker,\n An Efficient Method for Generating
33  * Discrete Random Variables and General Distributions,\n ACM TOMS 3,
34  * 253--256 (1977).
35  *
36  * There are two changes here in the setup algorithm as given by Knuth:
37  *
38  * - The probabilities aren't sorted at the beginning of the setup; nor are
39  * they maintained in a sorted order. Instead they are just partitioned on
40  * the mean. This improves the setup time from O(\e k<sup>2</sup>) to O(\e
41  * k).
42  *
43  * - The internal calculations are carried out with type \e NumericType. If
44  * the input weights are of integer type, then choosing an integer type for
45  * \e NumericType yields an exact solution for the returned distribution
46  * (assuming that the underlying random generator is exact.)
47  *
48  * Example:
49  * \code
50  #include <RandomLib/RandomSelect.hpp>
51 
52  // Weights for throwing a pair of dice
53  unsigned w[] = { 0, 0, 1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1 };
54 
55  // Initialize selection
56  RandomLib::RandomSelect<unsigned> sel(w, w + 13);
57 
58  RandomLib::Random r; // Initialize random numbers
59  std::cout << "Seed set to " << r.SeedString() << "\n";
60 
61  std::cout << "Throw a pair of dice 100 times:";
62  for (unsigned i = 0; i < 100; ++i)
63  std::cout << " " << sel(r);
64  std::cout << "\n";
65  \endcode
66  *
67  * @tparam NumericType the numeric type to use (default double).
68  **********************************************************************/
69  template<typename NumericType = double> class RandomSelect {
70  public:
71  /**
72  * Initialize in a cleared state (equivalent to having a single
73  * choice).
74  **********************************************************************/
75  RandomSelect() : _k(0), _wsum(0), _wmax(0) {}
76 
77  /**
78  * Initialize with a weight vector \e w of elements of type \e WeightType.
79  * Internal calculations are carried out with type \e NumericType. \e
80  * NumericType needs to allow Choices() * MaxWeight() to be represented.
81  * Sensible combinations are:
82  * - \e WeightType integer, \e NumericType integer with
83  * digits(\e NumericType) &ge; digits(\e WeightType)
84  * - \e WeightType integer, \e NumericType real
85  * - \e WeightType real, \e NumericType real with digits(\e NumericType)
86  * &ge; digits(\e WeightType)
87  *
88  * @tparam WeightType the type of the weights.
89  * @param[in] w the vector of weights.
90  * @exception RandomErr if any of the weights are negative or if the total
91  * weight is not positive.
92  **********************************************************************/
93  template<typename WeightType>
94  RandomSelect(const std::vector<WeightType>& w) { Init(w.begin(), w.end()); }
95 
96  /**
97  * Initialize with a weight given by a pair of iterators [\e a, \e b).
98  *
99  * @tparam InputIterator the type of the iterator.
100  * @param[in] a the beginning iterator.
101  * @param[in] b the ending iterator.
102  * @exception RandomErr if any of the weights are negative or if the total
103  * weight is not positive.
104  **********************************************************************/
105  template<typename InputIterator>
106  RandomSelect(InputIterator a, InputIterator b);
107 
108  /**
109  * Clear the state (equivalent to having a single choice).
110  **********************************************************************/
111  void Init() throw()
112  { _k = 0; _wsum = 0; _wmax = 0; _Q.clear(); _Y.clear(); }
113 
114  /**
115  * Re-initialize with a weight vector \e w. Leave state unaltered in the
116  * case of an error.
117  *
118  * @tparam WeightType the type of the weights.
119  * @param[in] w the vector of weights.
120  **********************************************************************/
121  template<typename WeightType>
122  void Init(const std::vector<WeightType>& w) { Init(w.begin(), w.end()); }
123 
124  /**
125  * Re-initialize with a weight given as a pair of iterators [\e a, \e b).
126  * Leave state unaltered in the case of an error.
127  *
128  * @tparam InputIterator the type of the iterator.
129  * @param[in] a the beginning iterator.
130  * @param[in] b the ending iterator.
131  **********************************************************************/
132  template<typename InputIterator>
133  void Init(InputIterator a, InputIterator b) {
135  _Q.reserve(t._k);
136  _Y.reserve(t._k);
137  *this = t;
138  }
139 
140  /**
141  * Return an index into the weight vector with probability proportional to
142  * the weight.
143  *
144  * @tparam Random the type of RandomCanonical generator.
145  * @param[in,out] r the RandomCanonical generator.
146  * @return the random index into the weight vector.
147  **********************************************************************/
148  template<class Random>
149  unsigned operator()(Random& r) const throw() {
150  if (_k <= 1)
151  return 0; // Special cases
152  const unsigned K = r.template Integer<unsigned>(_k);
153  // redundant casts to type NumericType to prevent warning from MS Project
154  return (std::numeric_limits<NumericType>::is_integer ?
155  r.template Prob<NumericType>(NumericType(_Q[K]),
156  NumericType(_wsum)) :
157  r.template Prob<NumericType>(NumericType(_Q[K]))) ?
158  K : _Y[K];
159  }
160 
161  /**
162  * @return the sum of the weights.
163  **********************************************************************/
164  NumericType TotalWeight() const throw() { return _wsum; }
165 
166  /**
167  * @return the maximum weight.
168  **********************************************************************/
169  NumericType MaxWeight() const throw() { return _wmax; }
170 
171  /**
172  * @param[in] i the index in to the weight vector.
173  * @return the weight for sample \e i. Weight(i) / TotalWeight() gives the
174  * probability of sample \e i.
175  **********************************************************************/
176  NumericType Weight(unsigned i) const throw() {
177  if (i >= _k)
178  return NumericType(0);
179  else if (_k == 1)
180  return _wsum;
181  const NumericType n = std::numeric_limits<NumericType>::is_integer ?
182  _wsum : NumericType(1);
183  NumericType p = _Q[i];
184  for (unsigned j = _k; j;)
185  if (_Y[--j] == i)
186  p += n - _Q[j];
187  // If NumericType is integral, then p % _k == 0.
188  // assert(!std::numeric_limits<NumericType>::is_integer || p % _k == 0);
189  return (p / NumericType(_k)) * (_wsum / n);
190  }
191 
192  /**
193  * @return the number of choices, i.e., the length of the weight vector.
194  **********************************************************************/
195  unsigned Choices() const throw() { return _k; }
196 
197  private:
198 
199  /**
200  * Size of weight vector
201  **********************************************************************/
202  unsigned _k;
203  /**
204  * Vector of cutoffs
205  **********************************************************************/
206  std::vector<NumericType> _Q;
207  /**
208  * Vector of aliases
209  **********************************************************************/
210  std::vector<unsigned> _Y;
211  /**
212  * The sum of the weights
213  **********************************************************************/
214  NumericType _wsum;
215  /**
216  * The maximum weight
217  **********************************************************************/
218  NumericType _wmax;
219 
220  };
221 
222  template<typename NumericType> template<typename InputIterator>
223  RandomSelect<NumericType>::RandomSelect(InputIterator a, InputIterator b) {
224 
225  typedef typename std::iterator_traits<InputIterator>::value_type
226  WeightType;
227  // Disallow WeightType = real, NumericType = integer
228  STATIC_ASSERT(std::numeric_limits<WeightType>::is_integer ||
229  !std::numeric_limits<NumericType>::is_integer,
230  "RandomSelect: inconsistent WeightType and NumericType");
231 
232  // If WeightType and NumericType are the same type, NumericType as precise
233  // as WeightType
234  STATIC_ASSERT(std::numeric_limits<WeightType>::is_integer !=
235  std::numeric_limits<NumericType>::is_integer ||
236  std::numeric_limits<NumericType>::digits >=
237  std::numeric_limits<WeightType>::digits,
238  "RandomSelect: NumericType insufficiently precise");
239 
240  _wsum = 0;
241  _wmax = 0;
242  std::vector<NumericType> p;
243 
244  for (InputIterator wptr = a; wptr != b; ++wptr) {
245  // Test *wptr < 0 without triggering compiler warning when *wptr =
246  // unsigned
247  if (!(*wptr > 0 || *wptr == 0))
248  // This also catches NaNs
249  throw RandomErr("RandomSelect: Illegal weight");
250  NumericType w = NumericType(*wptr);
251  if (w > (std::numeric_limits<NumericType>::max)() - _wsum)
252  throw RandomErr("RandomSelect: Overflow");
253  _wsum += w;
254  _wmax = w > _wmax ? w : _wmax;
255  p.push_back(w);
256  }
257 
258  _k = unsigned(p.size());
259  if (_wsum <= 0)
260  throw RandomErr("RandomSelect: Zero total weight");
261 
262  if (_k <= 1) {
263  // We treat k <= 1 as a special case in operator()
264  _Q.clear();
265  _Y.clear();
266  return;
267  }
268 
269  if ((std::numeric_limits<NumericType>::max)()/NumericType(_k) <
270  NumericType(_wmax))
271  throw RandomErr("RandomSelect: Overflow");
272 
273  std::vector<unsigned> j(_k);
274  _Q.resize(_k);
275  _Y.resize(_k);
276 
277  // Pointers to the next empty low and high slots
278  unsigned u = 0;
279  unsigned v = _k - 1;
280 
281  // Scale input and store in p and setup index array j. Note _wsum =
282  // mean(p). We could scale out _wsum here, but the following is exact when
283  // w[i] are low integers.
284  for (unsigned i = 0; i < _k; ++i) {
285  p[i] *= NumericType(_k);
286  j[p[i] > _wsum ? v-- : u++] = i;
287  }
288 
289  // Pointers to the next low and high slots to use. Work towards the
290  // middle. This simplifies the loop exit test to u == v.
291  u = 0;
292  v = _k - 1;
293 
294  // For integer NumericType, store the unnormalized probability in _Q and
295  // select using the exact Prob(_Q[k], _wsum). For real NumericType, store
296  // the normalized probability and select using Prob(_Q[k]). There will be
297  // a round off error in performing the division; but there is also the
298  // potential for round off errors in performing the arithmetic on p. There
299  // is therefore no point in simulating the division exactly using the
300  // slower Prob(real, real).
301  const NumericType n = std::numeric_limits<NumericType>::is_integer ?
302  NumericType(1) : _wsum;
303 
304  while (true) {
305  // A loop invariant here is mean(p[j[u..v]]) == _wsum
306  _Q[j[u]] = p[j[u]] / n;
307 
308  // If all arithmetic were exact this assignment could be:
309  // if (p[j[u]] < _wsum) _Y[j[u]] = j[v];
310  // But the following is safer:
311  _Y[j[u]] = j[p[j[u]] < _wsum ? v : u];
312 
313  if (u == v) {
314  // The following assertion may fail because of roundoff errors
315  // assert( p[j[u]] == _wsum );
316  break;
317  }
318 
319  // Update p, u, and v maintaining the loop invariant
320  p[j[v]] = p[j[v]] - (_wsum - p[j[u]]);
321  if (p[j[v]] > _wsum)
322  ++u;
323  else
324  j[u] = j[v--];
325  }
326  return;
327  }
328 
329 } // namespace RandomLib
330 
331 #if defined(_MSC_VER)
332 # pragma warning (pop)
333 #endif
334 
335 #endif // RANDOMLIB_RANDOMSELECT_HPP
void Init(const std::vector< WeightType > &w)
unsigned Choices() const
RandomSelect(const std::vector< WeightType > &w)
Generate random integers, reals, and booleans.
Random selection from a discrete set.
NumericType Weight(unsigned i) const
void Init(InputIterator a, InputIterator b)
Exception handling for RandomLib.
Definition: Random.hpp:103
#define STATIC_ASSERT(cond, reason)
Definition: MPFRRandom.hpp:31
unsigned operator()(Random &r) const
NumericType TotalWeight() const
NumericType MaxWeight() const