RandomSelect.hpp
Go to the documentation of this file.
00001 /**
00002  * \file RandomSelect.hpp
00003  * \brief Header for RandomSelect.
00004  *
00005  * An implementation of the Walker algorithm for selecting from a finite set.
00006  *
00007  * Written by Charles Karney <charles@karney.com> and licensed under the LGPL.
00008  * For more information, see http://randomlib.sourceforge.net/
00009  **********************************************************************/
00010 
00011 #if !defined(RANDOMSELECT_HPP)
00012 #define RANDOMSELECT_HPP "$Id: RandomSelect.hpp 6723 2010-01-11 14:20:10Z ckarney $"
00013 
00014 #include <vector>
00015 #include <limits>
00016 #include <stdexcept>
00017 
00018 #if !defined(STATIC_ASSERT)
00019 /**
00020  * A simple compile-time assert.
00021  **********************************************************************/
00022 #define STATIC_ASSERT(cond,reason) { enum{ STATIC_ASSERT_ENUM = 1/int(cond) }; }
00023 #endif
00024 
00025 namespace RandomLib {
00026   /**
00027    * \brief Random selection from a discrete set.
00028    *
00029    * An implementation of Walker algorithm for selecting from a finite set
00030    * (following Knuth, TAOCP, Vol 2, Sec 3.4.1.A).  This provides a rapid way
00031    * of selecting one of several choices depending on a discrete set weights.
00032    * Original citation is\n A. J. Walker,\n An Efficient Method for Generating
00033    * Discrete Random Variables and General Distributions,\n ACM TOMS 3, 253-256
00034    * (1977).
00035    *
00036    * There are two changes here in the setup algorithm as given by Knuth:
00037    *
00038    * - The probabilities aren't sorted at the beginning of the setup; nor are
00039    * they maintained in a sorted order.  Instead they are just partitioned on
00040    * the mean.  This improves the setup time from O(\e k<sup>2</sup>) to O(\e
00041    * k).
00042    *
00043    * - The internal calculations are carried out with type \a NumericType.  If
00044    * the input weights are of integer type, then choosing an integer type for
00045    * \a NumericType yields an exact solution for the returned distribution
00046    * (assuming that the underlying random generator is exact.)
00047    *
00048    * Example:
00049    * \code
00050    *   #include "RandomLib/RandomSelect.hpp"
00051    *
00052    *   // Weights for throwing a pair of dice
00053    *   unsigned w[] = { 0, 0, 1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1 };
00054    *
00055    *   // Initialize selection
00056    *   RandomLib::RandomSelect<unsigned> sel(w, w + 13);
00057    *
00058    *   RandomLib::Random r;   // Initialize random numbers
00059    *   std::cout << "Seed set to " << r.SeedString() << std::endl;
00060    *
00061    *   std::cout << "Throw a pair of dice 100 times:";
00062    *   for (unsigned i = 0; i < 100; ++i)
00063    *       std::cout << " " << sel(r);
00064    *   std::cout << std::endl;
00065    * \endcode
00066    **********************************************************************/
00067   template<typename NumericType = double> class RandomSelect {
00068   public:
00069     /**
00070      * Initialize in a cleared state (equivalent to having a single
00071      * choice).
00072      **********************************************************************/
00073     RandomSelect() throw(std::bad_alloc) : _k(0), _wsum(0), _wmax(0) {};
00074 
00075     /**
00076      * Initialize with a weight vector \a w of elements of type \a WeightType.
00077      * Internal calculations are carried out with type \a NumericType.  \a
00078      * NumericType needs to allow Choices() * MaxWeight() to be represented.
00079      * Sensible combinations are:
00080      * - \a WeightType integer, \a NumericType integer with digits(\a
00081      *   NumericType) >= digits(\a WeightType)
00082      * - \a WeightType integer, \a NumericType real
00083      * - \a WeightType real, \a NumericType real with digits(\a NumericType) >=
00084      *   digits(\a WeightType)
00085      **********************************************************************/
00086     template<typename WeightType>
00087     RandomSelect(const std::vector<WeightType>& w)
00088       throw(std::out_of_range, std::bad_alloc) { Init(w.begin(), w.end()); }
00089 
00090     /**
00091      * Initialize with a weight given by a pair of iterators [\a a, \a b).
00092      **********************************************************************/
00093     template<typename InputIterator>
00094     RandomSelect(InputIterator a, InputIterator b)
00095       throw(std::out_of_range, std::bad_alloc);
00096 
00097 
00098     /**
00099      * Clear the state (equivalent to having a single choice).
00100      **********************************************************************/
00101     void Init() throw()
00102     { _k = 0; _wsum = 0; _wmax = 0; _Q.clear(); _Y.clear(); }
00103 
00104     /**
00105      * Re-initialize with a weight vector \a w.  Leave state unaltered in the
00106      * case of an error.
00107      **********************************************************************/
00108     template<typename WeightType>
00109     void Init(const std::vector<WeightType>& w)
00110       throw(std::out_of_range, std::bad_alloc) { Init(w.begin(), w.end()); }
00111 
00112     /**
00113      * Re-initialize with a weight given as a pair of iterators [\a a, \a b).
00114      * Leave state unaltered in the case of an error.
00115      **********************************************************************/
00116     template<typename InputIterator>
00117     void Init(InputIterator a, InputIterator b)
00118       throw(std::out_of_range, std::bad_alloc) {
00119       RandomSelect<NumericType> t(a, b);
00120       _Q.reserve(t._k);
00121       _Y.reserve(t._k);
00122       *this = t;
00123     }
00124 
00125     /**
00126      * Return an index into the weight vector with probability proportional to
00127      * the weight.
00128      **********************************************************************/
00129     template<class Random>
00130     unsigned operator()(Random& r) const throw() {
00131       if (_k <= 1)
00132         return 0;               // Special cases
00133       const unsigned K = r.template Integer<unsigned>(_k);
00134       // redundant casts to type NumericType to prevent warning from MS
00135       // Project
00136       return (std::numeric_limits<NumericType>::is_integer ?
00137               r.template Prob<NumericType>(NumericType(_Q[K]),
00138                                            NumericType(_wsum)) :
00139               r.template Prob<NumericType>(NumericType(_Q[K]))) ?
00140         K : _Y[K];
00141     }
00142 
00143     /**
00144      * Return the sum of the weights.
00145      **********************************************************************/
00146     NumericType TotalWeight() const throw() { return _wsum; }
00147 
00148     /**
00149      * Return the maximum weight.
00150      **********************************************************************/
00151     NumericType MaxWeight() const throw() { return _wmax; }
00152 
00153     /**
00154      * Return the weight for sample \a i.  Weight(i) / TotalWeight() gives the
00155      * probability of sample \a i.
00156      **********************************************************************/
00157     NumericType Weight(unsigned i) const throw() {
00158       if (i >= _k)
00159         return NumericType(0);
00160       else if (_k == 1)
00161         return _wsum;
00162       const NumericType n = std::numeric_limits<NumericType>::is_integer ?
00163         _wsum : NumericType(1);
00164       NumericType p = _Q[i];
00165       for (unsigned j = _k; j;)
00166         if (_Y[--j] == i)
00167           p += n - _Q[j];
00168       // If NumericType is integral, then p % _k == 0.
00169       // assert(!std::numeric_limits<NumericType>::is_integer || p % _k == 0);
00170       return (p / NumericType(_k)) * (_wsum / n);
00171     }
00172 
00173     /**
00174      * Return the number of choices, i.e., the length of the weight vector.
00175      **********************************************************************/
00176     unsigned Choices() const throw() { return _k; }
00177 
00178   private:
00179 
00180     /**
00181      * Size of weight vector
00182      **********************************************************************/
00183     unsigned _k;
00184     /**
00185      * Vector of cutoffs
00186      **********************************************************************/
00187     std::vector<NumericType> _Q;
00188     /**
00189      * Vector of aliases
00190      **********************************************************************/
00191     std::vector<unsigned> _Y;
00192     /**
00193      * The sum of the weights
00194      **********************************************************************/
00195     NumericType _wsum;
00196     /**
00197      * The maximum weight
00198      **********************************************************************/
00199     NumericType _wmax;
00200 
00201   };
00202 
00203   template<typename NumericType> template<typename InputIterator>
00204   RandomSelect<NumericType>::RandomSelect(InputIterator a, InputIterator b)
00205     throw(std::out_of_range, std::bad_alloc) {
00206 
00207     typedef typename std::iterator_traits<InputIterator>::value_type
00208       WeightType;
00209     // Disallow WeightType = real, NumericType = integer
00210     STATIC_ASSERT(std::numeric_limits<WeightType>::is_integer ||
00211                   !std::numeric_limits<NumericType>::is_integer,
00212                   "RandomSelect: inconsisent WeightType and NumericType");
00213 
00214     // If WeightType and NumericType are the same type, NumericType as precise
00215     // as WeightType
00216     STATIC_ASSERT(std::numeric_limits<WeightType>::is_integer !=
00217                   std::numeric_limits<NumericType>::is_integer ||
00218                   std::numeric_limits<NumericType>::digits >=
00219                   std::numeric_limits<WeightType>::digits,
00220                   "RandomSelect: NumericType insufficiently precise");
00221 
00222     _wsum = 0;
00223     _wmax = 0;
00224     std::vector<NumericType> p;
00225 
00226     for (InputIterator wptr = a; wptr != b; ++wptr) {
00227       // Test *wptr < 0 without triggering compiler warning when *wptr =
00228       // unsigned
00229       if (!(*wptr > 0 || *wptr == 0))
00230         // This also catches NaNs
00231         throw std::out_of_range("RandomSelect: Illegal weight");
00232       NumericType w = NumericType(*wptr);
00233       if (w > (std::numeric_limits<NumericType>::max)() - _wsum)
00234         throw std::out_of_range("RandomSelect: Overflow");
00235       _wsum += w;
00236       _wmax = w > _wmax ? w : _wmax;
00237       p.push_back(w);
00238     }
00239 
00240     _k = unsigned(p.size());
00241     if (_wsum <= 0)
00242       throw std::out_of_range("RandomSelect: Zero total weight");
00243 
00244     if (_k <= 1) {
00245       // We treak k <= 1 as a special case in operator()
00246       _Q.clear();
00247       _Y.clear();
00248       return;
00249     }
00250 
00251     if ((std::numeric_limits<NumericType>::max)()/NumericType(_k) <
00252         NumericType(_wmax))
00253       throw std::out_of_range("RandomSelect: Overflow");
00254 
00255     std::vector<unsigned> j(_k);
00256     _Q.resize(_k);
00257     _Y.resize(_k);
00258 
00259     // Pointers to the next empty low and high slots
00260     unsigned u = 0;
00261     unsigned v = _k - 1;
00262 
00263     // Scale input and store in p and setup index array j.  Note _wsum =
00264     // mean(p).  We could scale out _wsum here, but the following is exact when
00265     // w[i] are low integers.
00266     for (unsigned i = 0; i < _k; ++i) {
00267       p[i] *= NumericType(_k);
00268       j[p[i] > _wsum ? v-- : u++] = i;
00269     }
00270 
00271     // Pointers to the next low and high slots to use.  Work towards the
00272     // middle.  This simplifies the loop exit test to u == v.
00273     u = 0;
00274     v = _k - 1;
00275 
00276     // For integer NumericType, store the unnormalized probability in _Q and
00277     // select using the exact Prob(_Q[k], _wsum).  For real NumericType, store
00278     // the normalized probability and select using Prob(_Q[k]).  There will be
00279     // a round off error in performing the division; but there is also the
00280     // potential for round off errors in performing the arithmetic on p.  There
00281     // is therefore no point in simulating the division exactly using the
00282     // slower Prob(real, real).
00283     const NumericType n = std::numeric_limits<NumericType>::is_integer ?
00284       NumericType(1) : _wsum;
00285 
00286     while (true) {
00287       // A loop invariant here is mean(p[j[u..v]]) == _wsum
00288       _Q[j[u]] = p[j[u]] / n;
00289 
00290       // If all arithmetic were exact this assignment could be:
00291       //   if (p[j[u]] < _wsum) _Y[j[u]] = j[v];
00292       // But the following is safer:
00293       _Y[j[u]] = j[p[j[u]] < _wsum ? v : u];
00294 
00295       if (u == v) {
00296         // The following assertion may fail because of roundoff errors
00297         // assert( p[j[u]] == _wsum );
00298         break;
00299       }
00300 
00301       // Update p, u, and v maintaining the loop invariant
00302       p[j[v]] = p[j[v]] - (_wsum - p[j[u]]);
00303       if (p[j[v]] > _wsum)
00304         ++u;
00305       else
00306         j[u] = j[v--];
00307     }
00308     return;
00309   }
00310 
00311 } // namespace RandomLib
00312 
00313 #endif // RANDOMSELECT_HPP