00001 /** 00002 * \file RandomSelect.hpp 00003 * \brief Header for RandomSelect. 00004 * 00005 * An implementation of the Walker algorithm for selecting from a finite set. 00006 * 00007 * Written by Charles Karney <charles@karney.com> and licensed under the LGPL. 00008 * For more information, see http://randomlib.sourceforge.net/ 00009 **********************************************************************/ 00010 00011 #if !defined(RANDOMSELECT_HPP) 00012 #define RANDOMSELECT_HPP "$Id: RandomSelect.hpp 6723 2010-01-11 14:20:10Z ckarney $" 00013 00014 #include <vector> 00015 #include <limits> 00016 #include <stdexcept> 00017 00018 #if !defined(STATIC_ASSERT) 00019 /** 00020 * A simple compile-time assert. 00021 **********************************************************************/ 00022 #define STATIC_ASSERT(cond,reason) { enum{ STATIC_ASSERT_ENUM = 1/int(cond) }; } 00023 #endif 00024 00025 namespace RandomLib { 00026 /** 00027 * \brief Random selection from a discrete set. 00028 * 00029 * An implementation of Walker algorithm for selecting from a finite set 00030 * (following Knuth, TAOCP, Vol 2, Sec 3.4.1.A). This provides a rapid way 00031 * of selecting one of several choices depending on a discrete set weights. 00032 * Original citation is\n A. J. Walker,\n An Efficient Method for Generating 00033 * Discrete Random Variables and General Distributions,\n ACM TOMS 3, 253-256 00034 * (1977). 00035 * 00036 * There are two changes here in the setup algorithm as given by Knuth: 00037 * 00038 * - The probabilities aren't sorted at the beginning of the setup; nor are 00039 * they maintained in a sorted order. Instead they are just partitioned on 00040 * the mean. This improves the setup time from O(\e k<sup>2</sup>) to O(\e 00041 * k). 00042 * 00043 * - The internal calculations are carried out with type \a NumericType. If 00044 * the input weights are of integer type, then choosing an integer type for 00045 * \a NumericType yields an exact solution for the returned distribution 00046 * (assuming that the underlying random generator is exact.) 00047 * 00048 * Example: 00049 * \code 00050 * #include "RandomLib/RandomSelect.hpp" 00051 * 00052 * // Weights for throwing a pair of dice 00053 * unsigned w[] = { 0, 0, 1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1 }; 00054 * 00055 * // Initialize selection 00056 * RandomLib::RandomSelect<unsigned> sel(w, w + 13); 00057 * 00058 * RandomLib::Random r; // Initialize random numbers 00059 * std::cout << "Seed set to " << r.SeedString() << std::endl; 00060 * 00061 * std::cout << "Throw a pair of dice 100 times:"; 00062 * for (unsigned i = 0; i < 100; ++i) 00063 * std::cout << " " << sel(r); 00064 * std::cout << std::endl; 00065 * \endcode 00066 **********************************************************************/ 00067 template<typename NumericType = double> class RandomSelect { 00068 public: 00069 /** 00070 * Initialize in a cleared state (equivalent to having a single 00071 * choice). 00072 **********************************************************************/ 00073 RandomSelect() throw(std::bad_alloc) : _k(0), _wsum(0), _wmax(0) {}; 00074 00075 /** 00076 * Initialize with a weight vector \a w of elements of type \a WeightType. 00077 * Internal calculations are carried out with type \a NumericType. \a 00078 * NumericType needs to allow Choices() * MaxWeight() to be represented. 00079 * Sensible combinations are: 00080 * - \a WeightType integer, \a NumericType integer with digits(\a 00081 * NumericType) >= digits(\a WeightType) 00082 * - \a WeightType integer, \a NumericType real 00083 * - \a WeightType real, \a NumericType real with digits(\a NumericType) >= 00084 * digits(\a WeightType) 00085 **********************************************************************/ 00086 template<typename WeightType> 00087 RandomSelect(const std::vector<WeightType>& w) 00088 throw(std::out_of_range, std::bad_alloc) { Init(w.begin(), w.end()); } 00089 00090 /** 00091 * Initialize with a weight given by a pair of iterators [\a a, \a b). 00092 **********************************************************************/ 00093 template<typename InputIterator> 00094 RandomSelect(InputIterator a, InputIterator b) 00095 throw(std::out_of_range, std::bad_alloc); 00096 00097 00098 /** 00099 * Clear the state (equivalent to having a single choice). 00100 **********************************************************************/ 00101 void Init() throw() 00102 { _k = 0; _wsum = 0; _wmax = 0; _Q.clear(); _Y.clear(); } 00103 00104 /** 00105 * Re-initialize with a weight vector \a w. Leave state unaltered in the 00106 * case of an error. 00107 **********************************************************************/ 00108 template<typename WeightType> 00109 void Init(const std::vector<WeightType>& w) 00110 throw(std::out_of_range, std::bad_alloc) { Init(w.begin(), w.end()); } 00111 00112 /** 00113 * Re-initialize with a weight given as a pair of iterators [\a a, \a b). 00114 * Leave state unaltered in the case of an error. 00115 **********************************************************************/ 00116 template<typename InputIterator> 00117 void Init(InputIterator a, InputIterator b) 00118 throw(std::out_of_range, std::bad_alloc) { 00119 RandomSelect<NumericType> t(a, b); 00120 _Q.reserve(t._k); 00121 _Y.reserve(t._k); 00122 *this = t; 00123 } 00124 00125 /** 00126 * Return an index into the weight vector with probability proportional to 00127 * the weight. 00128 **********************************************************************/ 00129 template<class Random> 00130 unsigned operator()(Random& r) const throw() { 00131 if (_k <= 1) 00132 return 0; // Special cases 00133 const unsigned K = r.template Integer<unsigned>(_k); 00134 // redundant casts to type NumericType to prevent warning from MS 00135 // Project 00136 return (std::numeric_limits<NumericType>::is_integer ? 00137 r.template Prob<NumericType>(NumericType(_Q[K]), 00138 NumericType(_wsum)) : 00139 r.template Prob<NumericType>(NumericType(_Q[K]))) ? 00140 K : _Y[K]; 00141 } 00142 00143 /** 00144 * Return the sum of the weights. 00145 **********************************************************************/ 00146 NumericType TotalWeight() const throw() { return _wsum; } 00147 00148 /** 00149 * Return the maximum weight. 00150 **********************************************************************/ 00151 NumericType MaxWeight() const throw() { return _wmax; } 00152 00153 /** 00154 * Return the weight for sample \a i. Weight(i) / TotalWeight() gives the 00155 * probability of sample \a i. 00156 **********************************************************************/ 00157 NumericType Weight(unsigned i) const throw() { 00158 if (i >= _k) 00159 return NumericType(0); 00160 else if (_k == 1) 00161 return _wsum; 00162 const NumericType n = std::numeric_limits<NumericType>::is_integer ? 00163 _wsum : NumericType(1); 00164 NumericType p = _Q[i]; 00165 for (unsigned j = _k; j;) 00166 if (_Y[--j] == i) 00167 p += n - _Q[j]; 00168 // If NumericType is integral, then p % _k == 0. 00169 // assert(!std::numeric_limits<NumericType>::is_integer || p % _k == 0); 00170 return (p / NumericType(_k)) * (_wsum / n); 00171 } 00172 00173 /** 00174 * Return the number of choices, i.e., the length of the weight vector. 00175 **********************************************************************/ 00176 unsigned Choices() const throw() { return _k; } 00177 00178 private: 00179 00180 /** 00181 * Size of weight vector 00182 **********************************************************************/ 00183 unsigned _k; 00184 /** 00185 * Vector of cutoffs 00186 **********************************************************************/ 00187 std::vector<NumericType> _Q; 00188 /** 00189 * Vector of aliases 00190 **********************************************************************/ 00191 std::vector<unsigned> _Y; 00192 /** 00193 * The sum of the weights 00194 **********************************************************************/ 00195 NumericType _wsum; 00196 /** 00197 * The maximum weight 00198 **********************************************************************/ 00199 NumericType _wmax; 00200 00201 }; 00202 00203 template<typename NumericType> template<typename InputIterator> 00204 RandomSelect<NumericType>::RandomSelect(InputIterator a, InputIterator b) 00205 throw(std::out_of_range, std::bad_alloc) { 00206 00207 typedef typename std::iterator_traits<InputIterator>::value_type 00208 WeightType; 00209 // Disallow WeightType = real, NumericType = integer 00210 STATIC_ASSERT(std::numeric_limits<WeightType>::is_integer || 00211 !std::numeric_limits<NumericType>::is_integer, 00212 "RandomSelect: inconsisent WeightType and NumericType"); 00213 00214 // If WeightType and NumericType are the same type, NumericType as precise 00215 // as WeightType 00216 STATIC_ASSERT(std::numeric_limits<WeightType>::is_integer != 00217 std::numeric_limits<NumericType>::is_integer || 00218 std::numeric_limits<NumericType>::digits >= 00219 std::numeric_limits<WeightType>::digits, 00220 "RandomSelect: NumericType insufficiently precise"); 00221 00222 _wsum = 0; 00223 _wmax = 0; 00224 std::vector<NumericType> p; 00225 00226 for (InputIterator wptr = a; wptr != b; ++wptr) { 00227 // Test *wptr < 0 without triggering compiler warning when *wptr = 00228 // unsigned 00229 if (!(*wptr > 0 || *wptr == 0)) 00230 // This also catches NaNs 00231 throw std::out_of_range("RandomSelect: Illegal weight"); 00232 NumericType w = NumericType(*wptr); 00233 if (w > (std::numeric_limits<NumericType>::max)() - _wsum) 00234 throw std::out_of_range("RandomSelect: Overflow"); 00235 _wsum += w; 00236 _wmax = w > _wmax ? w : _wmax; 00237 p.push_back(w); 00238 } 00239 00240 _k = unsigned(p.size()); 00241 if (_wsum <= 0) 00242 throw std::out_of_range("RandomSelect: Zero total weight"); 00243 00244 if (_k <= 1) { 00245 // We treak k <= 1 as a special case in operator() 00246 _Q.clear(); 00247 _Y.clear(); 00248 return; 00249 } 00250 00251 if ((std::numeric_limits<NumericType>::max)()/NumericType(_k) < 00252 NumericType(_wmax)) 00253 throw std::out_of_range("RandomSelect: Overflow"); 00254 00255 std::vector<unsigned> j(_k); 00256 _Q.resize(_k); 00257 _Y.resize(_k); 00258 00259 // Pointers to the next empty low and high slots 00260 unsigned u = 0; 00261 unsigned v = _k - 1; 00262 00263 // Scale input and store in p and setup index array j. Note _wsum = 00264 // mean(p). We could scale out _wsum here, but the following is exact when 00265 // w[i] are low integers. 00266 for (unsigned i = 0; i < _k; ++i) { 00267 p[i] *= NumericType(_k); 00268 j[p[i] > _wsum ? v-- : u++] = i; 00269 } 00270 00271 // Pointers to the next low and high slots to use. Work towards the 00272 // middle. This simplifies the loop exit test to u == v. 00273 u = 0; 00274 v = _k - 1; 00275 00276 // For integer NumericType, store the unnormalized probability in _Q and 00277 // select using the exact Prob(_Q[k], _wsum). For real NumericType, store 00278 // the normalized probability and select using Prob(_Q[k]). There will be 00279 // a round off error in performing the division; but there is also the 00280 // potential for round off errors in performing the arithmetic on p. There 00281 // is therefore no point in simulating the division exactly using the 00282 // slower Prob(real, real). 00283 const NumericType n = std::numeric_limits<NumericType>::is_integer ? 00284 NumericType(1) : _wsum; 00285 00286 while (true) { 00287 // A loop invariant here is mean(p[j[u..v]]) == _wsum 00288 _Q[j[u]] = p[j[u]] / n; 00289 00290 // If all arithmetic were exact this assignment could be: 00291 // if (p[j[u]] < _wsum) _Y[j[u]] = j[v]; 00292 // But the following is safer: 00293 _Y[j[u]] = j[p[j[u]] < _wsum ? v : u]; 00294 00295 if (u == v) { 00296 // The following assertion may fail because of roundoff errors 00297 // assert( p[j[u]] == _wsum ); 00298 break; 00299 } 00300 00301 // Update p, u, and v maintaining the loop invariant 00302 p[j[v]] = p[j[v]] - (_wsum - p[j[u]]); 00303 if (p[j[v]] > _wsum) 00304 ++u; 00305 else 00306 j[u] = j[v--]; 00307 } 00308 return; 00309 } 00310 00311 } // namespace RandomLib 00312 00313 #endif // RANDOMSELECT_HPP