Random.cpp
Go to the documentation of this file.
00001 /**
00002  * \file Random.cpp
00003  * \brief Implementation code for RandomLib.
00004  *
00005  * Written by Charles Karney <charles@karney.com> and licensed under the LGPL.
00006  * For more information, see http://randomlib.sourceforge.net/
00007  *
00008  * \brief Code for MixerMT0, MixerMT1, MixerSFMT.
00009  *
00010  * MixerMT0 is adapted from MT19937 (init_by_array) and MT19937_64
00011  * (init_by_array64) by Makoto Matsumoto and Takuji Nishimura.  See
00012  * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c and
00013  * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/VERSIONS/C-LANG/mt19937-64.c
00014  *
00015  * MixerMT1 contains modifications to MixerMT0 by Charles Karney to
00016  * correct defects in MixerMT0.  This is described in W. E. Brown,
00017  * M. Fischler, J. Kowalkowski, M. Paterno, Random Number Generation in C++0X:
00018  * A Comprehensive Proposal, version 3, Sept 2006, Sec. 26.4.7.1,
00019  * http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2006/n2079.pdf
00020  * This has been replaced in the C++0X proposal by MixerSFMT.
00021  *
00022  * RandonMixer2 is adapted from SFMT19937's init_by_array Mutsuo Saito given in
00023  * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/SFMT-src-1.2.tar.gz and
00024  * is part of the C++0X proposal; see P. Becker, Working Draft, Standard for
00025  * Programming Language C++, Oct. 2007, Sec. 26.4.7.1,
00026  * http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2461.pdf
00027  *
00028  * The adaption to the C++ is by Charles Karney <charles@karney.com> and
00029  * licensed under the LGPL.  For more information, see
00030  * http://randomlib.sourceforge.net/
00031  *
00032  * \brief Code for MT19937<T> and SFMT19937<T>.
00033  *
00034  * MT19937<T> is adapted from MT19937 and MT19937_64 by Makoto Matsumoto and
00035  * Takuji Nishimura.  See
00036  * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c and
00037  * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/VERSIONS/C-LANG/mt19937-64.c
00038  *
00039  * The code for stepping MT19937 backwards is adapted (and simplified) from
00040  * revrand() by Katsumi Hagita.  See
00041  * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/VERSIONS/FORTRAN/REVmt19937b.f
00042  *
00043  * SFMT19937<T> is adapted from SFMT19937 Mutsuo Saito given in
00044  * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/M062821.pdf and
00045  * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/SFMT-src-1.2.tar.gz
00046  *
00047  * The code for stepping SFMT19937 backwards is by Charles Karney.
00048  *
00049  * The adaption to the C++ is by Charles Karney <charles@karney.com> and
00050  * licensed under the LGPL.  For more information, see
00051  * http://randomlib.sourceforge.net/
00052  **********************************************************************/
00053 
00054 #include "RandomLib/Random.hpp"
00055 
00056 #include <fstream>              // For SeedWord reading /dev/urandom
00057 #include <ctime>                // For SeedWord calling time()
00058 #include <sstream>              // For formatting in Write32/Read32
00059 #include <iomanip>              // For formatting in Write32/Read32
00060 #if !WINDOWS
00061 #include <sys/time.h>           // For SeedWord calling gettimeofday
00062 #include <unistd.h>             // For SeedWord calling getpid(), gethostid()
00063 #else
00064 #include <windows.h>            // For SeedWord calling high prec timer
00065 #include <winbase.h>
00066 #include <process.h>            // For SeedWord calling getpid()
00067 #define getpid _getpid
00068 #define gmtime_r(t,g) gmtime_s(g,t)
00069 #endif
00070 
00071 #if WINDOWS || defined(__CYGWIN__)
00072 #define strtoull strtoul
00073 #endif
00074 
00075 #define RANDOM_CPP "$Id: Random.cpp 6723 2010-01-11 14:20:10Z ckarney $"
00076 
00077 RCSID_DECL(RANDOM_CPP)
00078 RCSID_DECL(RANDOMTYPE_HPP)
00079 RCSID_DECL(RANDOMSEED_HPP)
00080 RCSID_DECL(RANDOMENGINE_HPP)
00081 RCSID_DECL(RANDOMMIXER_HPP)
00082 RCSID_DECL(RANDOMALGORITHM_HPP)
00083 RCSID_DECL(RANDOMPOWER2_HPP)
00084 RCSID_DECL(RANDOMCANONICAL_HPP)
00085 
00086 namespace RandomLib {
00087 
00088   // RandomType implementation
00089 
00090   template<>
00091   void Random_u32::Write32(std::ostream& os, bool bin, int& cnt,
00092                            Random_u32::type x)
00093     throw(std::ios::failure) {
00094     if (bin) {
00095       unsigned char buf[4];
00096       // Use network order -- most significant byte first
00097       buf[3] = (unsigned char)(x);
00098       buf[2] = (unsigned char)(x >>= 8);
00099       buf[1] = (unsigned char)(x >>= 8);
00100       buf[0] = (unsigned char)(x >>= 8);
00101       os.write(reinterpret_cast<const char *>(buf), 4);
00102     } else {
00103       const int longsperline = 72/9;
00104       // Use hexadecimal to minimize storage together with stringstream to
00105       // isolate the effect of changing the base.
00106       std::ostringstream str;
00107       // No spacing before or after
00108       if (cnt > 0)
00109         // Newline every longsperline longs
00110         str << (cnt % longsperline ? ' ' : '\n');
00111       str << std::hex << x;
00112       os << str.str();
00113       ++cnt;
00114     }
00115   }
00116 
00117   template<>
00118   void Random_u32::Read32(std::istream& is, bool bin, Random_u32::type& x)
00119     throw(std::ios::failure) {
00120     if (bin) {
00121       unsigned char buf[4];
00122       is.read(reinterpret_cast<char *>(buf), 4);
00123       // Use network order -- most significant byte first
00124       x = Random_u32::type(buf[0]) << 24 | Random_u32::type(buf[1]) << 16 |
00125         Random_u32::type(buf[2]) << 8 | Random_u32::type(buf[3]);
00126     } else {
00127       std::string s;
00128       is >> std::ws >> s;
00129       // Use hexadecimal to minimize storage together with stringstream to
00130       // isolate the effect of changing the base.
00131       std::istringstream str(s);
00132       str >> std::hex >> x;
00133     }
00134     x &= Random_u32::mask;
00135   }
00136 
00137   template<>
00138   void Random_u64::Write32(std::ostream& os, bool bin, int& cnt,
00139                            Random_u64::type x)
00140     throw(std::ios::failure) {
00141     Random_u32::Write32(os, bin, cnt, Random_u32::cast(x >> 32));
00142     Random_u32::Write32(os, bin, cnt, Random_u32::cast(x      ));
00143   }
00144 
00145   template<>
00146   void Random_u64::Read32(std::istream& is, bool bin, Random_u64::type& x)
00147     throw(std::ios::failure) {
00148     Random_u32::type t;
00149     Random_u32::Read32(is, bin, t);
00150     x = Random_u64::type(t) << 32;
00151     Random_u32::Read32(is, bin, t);
00152     x |= Random_u64::type(t);
00153   }
00154 
00155   // RandomSeed implementation
00156 
00157   RandomSeed::seed_type RandomSeed::SeedWord() {
00158     // Check that the assumptions made about the capabilities of the number
00159     // system are valid.
00160     STATIC_ASSERT(std::numeric_limits<seed_type>::radix == 2 &&
00161                   !std::numeric_limits<seed_type>::is_signed &&
00162                   std::numeric_limits<seed_type>::digits >= 32,
00163                   "seed_type is a bad type");
00164     u32::type t = 0;
00165     // Linux has /dev/urandom to initialize the seed randomly.  (Use
00166     // /dev/urandom instead of /dev/random because it does not block.)
00167     {
00168       std::ifstream f("/dev/urandom", std::ios::binary | std::ios::in);
00169       if (f.good()) {
00170         // Read 32 bits from /dev/urandom
00171         f.read(reinterpret_cast<char *>(&t), sizeof(t));
00172       }
00173     }
00174     std::vector<seed_type> v = SeedVector();
00175     for (size_t i = v.size(); i--;)
00176       u32::CheckSum(v[i], t);
00177     return seed_t::cast(t);
00178   }
00179 
00180   std::vector<RandomSeed::seed_type> RandomSeed::SeedVector() {
00181     std::vector<seed_type> v;
00182     {
00183       // fine-grained timer
00184 #if !WINDOWS
00185       timeval tv;
00186       if (gettimeofday(&tv, 0) == 0)
00187         v.push_back(seed_t::cast(tv.tv_usec));
00188 #else
00189       LARGE_INTEGER taux;
00190       if (QueryPerformanceCounter((LARGE_INTEGER *)&taux)) {
00191         v.push_back(seed_t::cast(taux.LowPart));
00192         v.push_back(seed_t::cast(taux.HighPart));
00193       }
00194 #endif
00195     }
00196     // seconds
00197     const time_t tim = std::time(0);
00198     v.push_back(seed_t::cast(seed_type(tim)));
00199     // PID
00200     v.push_back(seed_t::cast(getpid()));
00201 #if !WINDOWS
00202     // host ID
00203     v.push_back(seed_t::cast(gethostid()));
00204 #endif
00205     {
00206       // year
00207       tm gt;
00208       gmtime_r(&tim, &gt);
00209       v.push_back((seed_type(1900) + seed_t::cast(gt.tm_year)));
00210     }
00211     std::transform(v.begin(), v.end(), v.begin(), seed_t::cast<seed_type>);
00212     return v;
00213   }
00214 
00215   std::vector<RandomSeed::seed_type>
00216   RandomSeed::StringToVector(const std::string& s)
00217     throw(std::bad_alloc) {
00218     std::vector<seed_type> v(0);
00219     const char* c = s.c_str();
00220     char* q;
00221     std::string::size_type p = 0;
00222     while (true) {
00223       p = s.find_first_of("0123456789", p);
00224       if (p == std::string::npos)
00225         break;
00226       v.push_back(seed_t::cast(std::strtoull(c + p, &q, 0)));
00227       p = q - c;
00228     }
00229     return v;
00230   }
00231 
00232   // RandomEngine implementation
00233 
00234   template<class Algorithm, class Mixer>
00235   void RandomEngine<Algorithm, Mixer>::Init() throw() {
00236     // On exit we have _ptr == N.
00237 
00238     STATIC_ASSERT(std::numeric_limits<typename mixer_t::type>::radix == 2 &&
00239                   !std::numeric_limits<typename mixer_t::type>::is_signed &&
00240                   std::numeric_limits<typename mixer_t::type>::digits >=
00241                   int(mixer_t::width),
00242                   "mixer_type is a bad type");
00243 
00244     STATIC_ASSERT(std::numeric_limits<result_type>::radix == 2 &&
00245                   !std::numeric_limits<result_type>::is_signed &&
00246                   std::numeric_limits<result_type>::digits >= width,
00247                   "engine_type is a bad type");
00248 
00249     STATIC_ASSERT(mixer_t::width == 32 || mixer_t::width == 64,
00250                   "Mixer width must be 32 or 64");
00251 
00252     STATIC_ASSERT(width == 32 || width == 64,
00253                   "Algorithm width must be 32 or 64");
00254 
00255     // If the bit-widths are the same then the data sizes must be the same.
00256     STATIC_ASSERT(!(mixer_t::width == width) ||
00257                   sizeof(_stateu) == sizeof(_state),
00258                   "Same bit-widths but different storage");
00259 
00260     // Repacking assumes that narrower data type is at least as wasteful than
00261     // the broader one.
00262     STATIC_ASSERT(!(mixer_t::width < width) ||
00263                   sizeof(_stateu) >= sizeof(_state),
00264                   "Narrow data type uses less storage");
00265 
00266     STATIC_ASSERT(!(mixer_t::width > width) ||
00267                   sizeof(_stateu) <= sizeof(_state),
00268                   "Narrow data type uses less storage");
00269 
00270     // Require that _statev and _state are aligned since no repacking is done
00271     // when calling Transition
00272     STATIC_ASSERT(sizeof(_statev) == sizeof(_state),
00273                   "Storage mismatch with internal engine data type");
00274 
00275     // Convert the seed into state
00276     Mixer::SeedToState(_seed, _stateu, NU);
00277 
00278     // Pack into _state
00279     if (mixer_t::width < width) {
00280       for (size_t i = 0; i < N; ++i)
00281         // Assume 2:1 LSB packing
00282         _state[i] = result_type(_stateu[2*i]) |
00283           result_type(_stateu[2*i + 1]) <<
00284           (mixer_t::width < width ? mixer_t::width : 0);
00285     } else if (mixer_t::width > width) {
00286       for (size_t i = N; i--;)
00287         // Assume 1:2 LSB packing
00288         _state[i] = result_t::cast(_stateu[i>>1] >> width * (i&1u));
00289     } // Otherwise the union takes care of it
00290 
00291     Algorithm::NormalizeState(_state);
00292 
00293     _rounds = -1;
00294     _ptr = N;
00295   }
00296 
00297   template<class Algorithm, class Mixer> Random_u32::type
00298   RandomEngine<Algorithm, Mixer>::Check(u64::type v, u32::type e,
00299                                          u32::type m) const
00300     throw(std::out_of_range) {
00301     if (v != version)
00302       throw std::out_of_range(Name() + ": Unknown version");
00303     if (e != Algorithm::version)
00304       throw std::out_of_range(Name() + ": Algorithm mismatch");
00305     if (m != Mixer::version)
00306       throw std::out_of_range(Name() + ": Mixer mismatch");
00307     u32::type check = 0;
00308     u64::CheckSum(v, check);
00309     u32::CheckSum(e, check);
00310     u32::CheckSum(m, check);
00311     u32::CheckSum(u32::type(_seed.size()), check);
00312     for (std::vector<seed_type>::const_iterator n = _seed.begin();
00313          n != _seed.end(); ++n) {
00314       if (*n != seed_t::cast(*n))
00315         throw std::out_of_range(Name() + ": Illegal seed value");
00316       u32::CheckSum(u32::type(*n), check);
00317     }
00318     u32::CheckSum(_ptr, check);
00319     if (_stride == 0 || _stride > UNINIT/2)
00320       throw std::out_of_range(Name() + ": Invalid stride");
00321     u32::CheckSum(_stride, check);
00322     if (_ptr != UNINIT) {
00323       if (_ptr >= N + _stride)
00324         throw std::out_of_range(Name() + ": Invalid pointer");
00325       u64::CheckSum(_rounds, check);
00326       Algorithm::CheckState(_state, check);
00327     }
00328     return check;
00329   }
00330 
00331   template<typename Algorithm, typename Mixer>
00332   RandomEngine<Algorithm, Mixer>::RandomEngine(std::istream& is, bool bin)
00333     throw(std::ios::failure, std::out_of_range, std::bad_alloc) {
00334     u64::type versionr;
00335     u32::type versione, versionm, t;
00336     u64::Read32(is, bin, versionr);
00337     u32::Read32(is, bin, versione);
00338     u32::Read32(is, bin, versionm);
00339     u32::Read32(is, bin, t);
00340     _seed.resize(size_t(t));
00341     for (std::vector<seed_type>::iterator n = _seed.begin();
00342          n != _seed.end(); ++n) {
00343       u32::Read32(is, bin, t);
00344       *n = seed_type(t);
00345     }
00346     u32::Read32(is, bin, t);
00347     // Don't need to worry about sign extension because _ptr is unsigned.
00348     _ptr = unsigned(t);
00349     u32::Read32(is, bin, t);
00350     _stride = unsigned(t);
00351     if (_ptr != UNINIT) {
00352       u64::type p;
00353       u64::Read32(is, bin, p);
00354       _rounds = (long long)(p);
00355       // Sign extension in case long long is bigger than 64 bits.
00356       _rounds <<= 63 - std::numeric_limits<long long>::digits;
00357       _rounds >>= 63 - std::numeric_limits<long long>::digits;
00358       for (unsigned i = 0; i < N; ++i)
00359         result_t::Read32(is, bin, _state[i]);
00360     }
00361     u32::Read32(is, bin, t);
00362     if (t != Check(versionr, versione, versionm))
00363       throw std::out_of_range(Name() + ": Checksum failure");
00364   }
00365 
00366   template<typename Algorithm, typename Mixer>
00367   void RandomEngine<Algorithm, Mixer>::Save(std::ostream& os,
00368                                             bool bin) const
00369     throw(std::ios::failure) {
00370     u32::type check = Check(version, Algorithm::version, Mixer::version);
00371     int c = 0;
00372     u64::Write32(os, bin, c, version);
00373     u32::Write32(os, bin, c, Algorithm::version);
00374     u32::Write32(os, bin, c, Mixer::version);
00375     u32::Write32(os, bin, c, u32::type(_seed.size()));
00376     for (std::vector<seed_type>::const_iterator n = _seed.begin();
00377          n != _seed.end(); ++n)
00378       u32::Write32(os, bin, c, u32::type(*n));
00379     u32::Write32(os, bin, c, _ptr);
00380     u32::Write32(os, bin, c, _stride);
00381     if (_ptr != UNINIT) {
00382       u64::Write32(os, bin, c, u64::type(_rounds));
00383       for (unsigned i = 0; i < N; ++i)
00384         result_t::Write32(os, bin, c, _state[i]);
00385     }
00386     u32::Write32(os, bin, c, check);
00387   }
00388 
00389   template<typename Algorithm, typename Mixer>
00390   void RandomEngine<Algorithm, Mixer>::StepCount(long long n) throw() {
00391     // On exit we have 0 <= _ptr <= N.
00392     if (_ptr == UNINIT)
00393       Init();
00394     const long long ncount = n + Count(); // new Count()
00395     long long nrounds = ncount / N;
00396     int nptr = int(ncount - nrounds * N);
00397     // We pick _ptr = N or _ptr = 0 depending on which choice involves the
00398     // least work.  We thus avoid doing one (potentially unneeded) call to
00399     // Transition.
00400     if (nptr < 0) {
00401       --nrounds;
00402       nptr += N;
00403     } else if (nptr == 0 && nrounds > _rounds) {
00404       nptr = N;
00405       --nrounds;
00406     }
00407     if (nrounds != _rounds)
00408       Algorithm::Transition(nrounds - _rounds, _statev);
00409     _rounds = nrounds;
00410     _ptr = nptr;
00411   }
00412 
00413   template<typename Algorithm, typename Mixer>
00414   void RandomEngine<Algorithm, Mixer>::SelfTest() {
00415     RandomEngine g(std::vector<seed_type>(0));
00416     g.SetCount(10000-1);
00417     result_type x = g();
00418     if (SelfTestResult(0) && x != SelfTestResult(1))
00419       throw std::out_of_range(Name() + ": Incorrect result with seed " +
00420                               g.SeedString());
00421     seed_type s[] = {0x1234U, 0x5678U, 0x9abcU, 0xdef0U};
00422     //    seed_type s[] = {1, 2, 3, 4};
00423     g.Reseed(s, s+4);
00424     g.StepCount(-20000);
00425     std::string save;
00426     {
00427       std::ostringstream stream;
00428       stream << g << std::endl;
00429       save = stream.str();
00430     }
00431     g.Reset();
00432     {
00433       std::istringstream stream(save);
00434       stream >> g;
00435     }
00436     g.SetCount(10000);
00437     {
00438       std::ostringstream stream;
00439       g.Save(stream, true);
00440       save = stream.str();
00441     }
00442     {
00443       std::istringstream stream(save);
00444       RandomEngine h(std::vector<seed_type>(0));
00445       h.Load(stream, true);
00446       h.SetCount(1000000-1);
00447       x = h();
00448       if (SelfTestResult(0) && x != SelfTestResult(2))
00449         throw std::out_of_range(Name() + ": Incorrect result with seed " +
00450                                 h.SeedString());
00451       g.SetCount(1000000);
00452       if (h != g)
00453         throw std::out_of_range(Name() + ": Comparison failure");
00454     }
00455   }
00456 
00457   template<> Random_u32::type
00458   RandomEngine<MT19937<Random_u32>, MixerMT0<Random_u32> >::
00459   SelfTestResult(unsigned i)
00460     throw() {
00461     return i == 0 ? 1 :
00462       i == 1 ? 4123659995UL : 3016432305UL;
00463   }
00464 
00465   template<> Random_u64::type
00466   RandomEngine<MT19937<Random_u64>, MixerMT0<Random_u64> >::
00467   SelfTestResult(unsigned i)
00468     throw() {
00469     return i == 0 ? 1 :
00470       i == 1 ? 9981545732273789042ULL : 1384037754719008581ULL;
00471   }
00472 
00473   template<> Random_u32::type
00474   RandomEngine<MT19937<Random_u32>, MixerMT1<Random_u32> >::
00475   SelfTestResult(unsigned i)
00476     throw() {
00477     return i == 0 ? 1 :
00478       i == 1 ? 4123659995UL : 2924523180UL;
00479   }
00480 
00481   template<> Random_u64::type
00482   RandomEngine<MT19937<Random_u64>, MixerMT1<Random_u64> >::
00483   SelfTestResult(unsigned i)
00484     throw() {
00485     return i == 0 ? 1 :
00486       i == 1 ? 9981545732273789042ULL : 5481486777409645478ULL;
00487   }
00488 
00489   template<> Random_u32::type
00490   RandomEngine<MT19937<Random_u32>, MixerSFMT>::
00491   SelfTestResult(unsigned i)
00492     throw() {
00493     return i == 0 ? 1 :
00494       i == 1 ? 666528879UL : 2183745132UL;
00495   }
00496 
00497   template<> Random_u64::type
00498   RandomEngine<MT19937<Random_u64>, MixerSFMT>::
00499   SelfTestResult(unsigned i)
00500     throw() {
00501     return i == 0 ? 1 :
00502       i == 1 ? 12176471137395770412ULL : 66914054428611861ULL;
00503   }
00504 
00505   template<> Random_u32::type
00506   RandomEngine<SFMT19937<Random_u32>, MixerSFMT>::
00507   SelfTestResult(unsigned i)
00508     throw() {
00509     return i == 0 ? 1 :
00510       i == 1 ? 2695024307UL : 782200760UL;
00511   }
00512 
00513   template<> Random_u64::type
00514   RandomEngine<SFMT19937<Random_u64>, MixerSFMT>::
00515   SelfTestResult(unsigned i)
00516     throw() {
00517     return i == 0 ? 1 :
00518       i == 1 ? 1464461649847485149ULL : 5050640804923595109ULL;
00519   }
00520 
00521   // RandomMixer implementation
00522 
00523   template<class RandomType> void MixerMT0<RandomType>::
00524   SeedToState(const std::vector<RandomSeed::seed_type>& seed,
00525               mixer_type state[], unsigned n) throw() {
00526     // Adapted from
00527     // http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c
00528     // http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/VERSIONS/C-LANG/mt19937-64.c
00529     const unsigned s = unsigned(seed.size());
00530     const unsigned w = mixer_t::width;
00531 
00532     mixer_type r = s ? a1 : a0;
00533     state[0] = r;
00534     for (unsigned k = 1; k < n; ++k) {
00535       r = b * (r ^ r >> w - 2) + k;
00536       r &= mask;
00537       state[k] = r;
00538     }
00539     if (s > 0) {
00540       const unsigned m = mixer_t::width / 32,
00541         s2 = (s + m - 1)/m;
00542       unsigned i1 = 1;
00543       r = state[0];
00544       for (unsigned k = (n > s2 ? n : s2), j = 0;
00545            k; --k, i1 = i1 == n - 1 ? 1 : i1 + 1, // i1 = i1 + 1 mod n - 1
00546              j = j == s2 - 1 ? 0 : j + 1 ) { // j = j+1 mod s2
00547         r = state[i1] ^ c * (r ^ r >> w - 2);
00548         r += j + mixer_type(seed[m * j]) +
00549           (m == 1 || 2 * j + 1 == s ? mixer_type(0) :
00550            mixer_type(seed[m * j + 1]) << w - 32);
00551         r &= mask;
00552         state[i1] = r;
00553       }
00554       for (unsigned k = n - 1; k; --k,
00555              i1 = i1 == n - 1 ? 1 : i1 + 1) { // i1 = i1 + 1 mod n - 1
00556         r = state[i1] ^ d * (r ^ r >> w - 2);
00557         r -= i1;
00558         r &= mask;
00559         state[i1] = r;
00560       }
00561       state[0] = typename mixer_t::type(1) << w - 1;
00562     }
00563   }
00564 
00565   template<class RandomType> void MixerMT1<RandomType>::
00566   SeedToState(const std::vector<RandomSeed::seed_type>& seed,
00567               mixer_type state[], unsigned n) throw() {
00568     // This is the algorithm given in the seed_seq class described in
00569     // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2006/n2079.pdf It is
00570     // a modification of
00571     // http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c
00572     // http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/VERSIONS/C-LANG/mt19937-64.c
00573     const unsigned s = unsigned(seed.size());
00574     const unsigned w = mixer_t::width;
00575 
00576     mixer_type r = (a + s) & mask;
00577     state[0] = r;
00578     for (unsigned k = 1; k < n; ++k) {
00579       r = b * (r ^ r >> w - 2) + k;
00580       r &= mask;
00581       state[k] = r;
00582     }
00583     if (s > 0) {
00584       const unsigned m = mixer_t::width / 32,
00585         s2 = (s + m - 1)/m;
00586       unsigned i1 = 0;
00587       for (unsigned k = (n > s2 ? n : s2), j = 0;
00588            k; --k, i1 = i1 == n - 1 ? 0 : i1 + 1, // i1 = i1 + 1 mod n
00589              j = j == s2 - 1 ? 0 : j + 1 ) { // j = j+1 mod s2
00590         r = state[i1] ^ c * (r ^ r >> w - 2);
00591         r += j + mixer_type(seed[m * j]) +
00592           (m == 1 || 2 * j + 1 == s ? mixer_type(0) :
00593            mixer_type(seed[m * j + 1]) << w - 32);
00594         r &= mask;
00595         state[i1] = r;
00596       }
00597       for (unsigned k = n; k; --k,
00598              i1 = i1 == n - 1 ? 0 : i1 + 1) { // i1 = i1 + 1 mod n
00599         r = state[i1] ^ d * (r ^ r >> w - 2);
00600         r -= i1;
00601         r &= mask;
00602         state[i1] = r;
00603       }
00604     }
00605   }
00606 
00607   void MixerSFMT::SeedToState(const std::vector<RandomSeed::seed_type>& seed,
00608                                  mixer_type state[], unsigned n)
00609     throw() {
00610     // This is adapted from the routine init_by_array by Mutsuo Saito given in
00611     // http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/SFMT-src-1.2.tar.gz
00612 
00613     if (n == 0)
00614       return;                   // Nothing to do
00615 
00616     const unsigned s = unsigned(seed.size()),
00617       // Add treatment of small n with lag = (n - 1)/2 for n <= 7.  In
00618       // particular, the first operation (xor or plus) in each for loop
00619       // involves three distinct indices for n > 2.
00620       lag = n >= 623 ? 11 : (n >= 68 ? 7 : (n >= 39 ? 5 :
00621                                             (n >= 7 ? 3 : (n - 1)/2))),
00622       // count = max( s + 1, n )
00623       count = s + 1 > n ? s + 1 : n;
00624 
00625     std::fill(state, state + n, mixer_type(a));
00626     const unsigned w = mixer_t::width;
00627 
00628     unsigned i = 0, k = (n - lag) / 2, l = k + lag;
00629     mixer_type r = state[n - 1];
00630     for (unsigned j = 0; j < count; ++j,
00631            i = i == n - 1 ? 0 : i + 1,
00632            k = k == n - 1 ? 0 : k + 1,
00633            l = l == n - 1 ? 0 : l + 1) {
00634       // Here r = state[(j - 1) mod n]
00635       //      i = j mod n
00636       //      k = (j + (n - lag)/2) mod n
00637       //      l = (j + (n - lag)/2 + lag) mod n
00638       r ^= state[i] ^ state[k];
00639       r &= mask;
00640       r = b * (r ^ r >> (w - 5));
00641       state[k] += r;
00642       r += i + (j > s ? 0 : (j ? mixer_type(seed[j - 1]) : s));
00643       state[l] += r;
00644       state[i] = r;
00645     }
00646 
00647     for (unsigned j = n; j; --j,
00648            i = i == n - 1 ? 0 : i + 1,
00649            k = k == n - 1 ? 0 : k + 1,
00650            l = l == n - 1 ? 0 : l + 1) {
00651       // Here r = state[(i - 1) mod n]
00652       //      k = (i + (n - lag)/2) mod n
00653       //      l = (i + (n - lag)/2 + lag) mod n
00654       r += state[i] + state[k];
00655       r &= mask;
00656       r = c * (r ^ r >> (w - 5));
00657       r &= mask;
00658       state[k] ^= r;
00659       r -= i;
00660       r &= mask;
00661       state[l] ^= r;
00662       state[i] = r;
00663     }
00664   }
00665 
00666   // RandomAlgorithm implementation
00667 
00668   // Here, input is I, J = I + 1, K = I + M; output is I = I + N (mod N)
00669 
00670 #define MT19937_STEP(I, J, K) statev[I] = statev[K] ^           \
00671     (statev[J] & engine_type(1) ? magic : engine_type(0)) ^     \
00672     ((statev[I] & upper) | (statev[J] & lower)) >> 1
00673 
00674   // The code is cleaned up a little from Hagita's Fortran version by getting
00675   // rid of the unnecessary masking by YMASK and by using simpler logic to
00676   // restore the correct value of _state[0].
00677   //
00678   // Here input is J = I + N - 1, K = I + M - 1, and p = y[I] (only the high
00679   // bits are used); output _state[I] and p = y[I - 1].
00680 
00681 #define MT19937_REVSTEP(I, J, K) {                                      \
00682     engine_type q = statev[J] ^ statev[K], s = q >> (width - 1);        \
00683     q = (q ^ (s ? magic : engine_type(0))) << 1 | s;                    \
00684     statev[I] = (p & upper) | (q & lower);                              \
00685     p = q;                                                              \
00686   }
00687 
00688   template<class RandomType>
00689   void MT19937<RandomType>::Transition(long long count, internal_type statev[])
00690     throw() {
00691     if (count > 0)
00692       for (; count; --count) {
00693         // This ONLY uses high bit of statev[0]
00694         unsigned i = 0;
00695         for (; i < N - M; ++i) MT19937_STEP(i, i + 1, i + M    );
00696         for (; i < N - 1; ++i) MT19937_STEP(i, i + 1, i + M - N);
00697         MT19937_STEP(N - 1, 0, M - 1); // i = N - 1
00698       }
00699     else if (count < 0)
00700       for (; count; ++count) {
00701         // This ONLY uses high bit of statev[0]
00702         engine_type p = statev[0];
00703         // Fix low bits of statev[0] and compute y[-1]
00704         MT19937_REVSTEP(0, N - 1, M - 1); // i = N
00705         unsigned i = N - 1;
00706         for (; i > N - M; --i) MT19937_REVSTEP(i, i - 1, i + M - 1 - N);
00707         for (; i        ; --i) MT19937_REVSTEP(i, i - 1, i + M - 1    );
00708         MT19937_REVSTEP(0, N - 1, M - 1); // i = 0
00709       }
00710   }
00711 
00712 #undef MT19937_STEP
00713 #undef MT19937_REVSTEP
00714 
00715   template<class RandomType>
00716   void MT19937<RandomType>::NormalizeState(engine_type state[]) throw () {
00717 
00718     // Perform the MT-specific sanity check on the resulting state ensuring
00719     // that the significant 19937 bits are not all zero.
00720     state[0] &= upper;  // Mask out unused bits
00721     unsigned i = 0;
00722     while (i < N && state[i] == 0)
00723       ++i;
00724     if (i >= N)
00725       state[0] = engine_type(1) << (width - 1); // with prob 2^-19937
00726 
00727     // This sets the low R bits of _state[0] consistent with the rest of the
00728     // state.  Needed to handle SetCount(-N); Ran32(); immediately following
00729     // reseeding.  This wasn't required in the original code because a
00730     // Transition was always done first.
00731     engine_type q = state[N - 1] ^ state[M - 1], s = q >> (width - 1);
00732     q = (q ^ (s ? magic : engine_type(0))) << 1 | s;
00733     state[0] = (state[0] & upper) | (q & lower);
00734   }
00735 
00736   template<class RandomType>
00737   void MT19937<RandomType>::CheckState(const engine_type state[],
00738                                        Random_u32::type& check)
00739     throw(std::out_of_range) {
00740     engine_type x = 0;
00741     Random_u32::type c = check;
00742     for (unsigned i = 0; i < N; ++i) {
00743       engine_t::CheckSum(state[i], c);
00744       x |= state[i];
00745     }
00746     if (x == 0)
00747       throw std::out_of_range("MT19937: All-zero state");
00748 
00749     // There are only width*(N-1) + 1 = 19937 independent bits of state.  Thus
00750     // the low width-1 bits of _state[0] are derivable from the other bits in
00751     // state.  Verify that the redundant bits bits are consistent.
00752     engine_type q = state[N - 1] ^ state[M - 1], s = q >> (width - 1);
00753     q = (q ^ (s ? magic : engine_type(0))) << 1 | s;
00754     if ((q ^ state[0]) & lower)
00755       throw std::out_of_range("MT19937: Invalid state");
00756 
00757     check = c;
00758   }
00759 
00760 #if HAVE_SSE2
00761 
00762   // Transition is from Saito's Master's Thesis
00763   // http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/M062821.pdf
00764   //
00765   // This implements
00766   //
00767   //     w_{i+N} = w_i A xor w_M B xor w_{i+N-2} C xor w_{i+N-1} D
00768   //
00769   // where w_i is a 128-bit word and
00770   //
00771   //     w A = (w << 8)_128 xor w
00772   //     w B = (w >> 11)_32 & MSK
00773   //     w C = (w >> 8)_128
00774   //     w D = (w << 18)_32
00775   //
00776   // Here the _128 means shift is of whole 128-bit word.  _32 means the shifts
00777   // are independently done on each 32-bit word.
00778   //
00779   // In SFMT19937_STEP32 and SFMT19937_STEP64 input is I, J = I + M and output
00780   // is I = I + N (mod N).  On input, s and r give state for I + N - 2 and I +
00781   // N - 1; on output s and r give state for I + N - 1 and I + N.  The
00782   // implementation of 128-bit operations is open-coded in a portable fashion
00783   // (with LSB ordering).
00784   //
00785   // N.B. Here N and M are the lags in units of BitWidth words and so are 4
00786   // (for u32 implementation) or 2 (for u64 implementation) times bigger than
00787   // defined in Saito's thesis.
00788 
00789   // This is adapted from SFMT-sse.c in the SFMT 1.2 distribution.
00790   // The order of instructions has been rearranged to increase the
00791   // speed slightly
00792 
00793 #define SFMT19937_STEP128(I, J) {                       \
00794     internal_type x = _mm_load_si128(statev + I),       \
00795       y = _mm_srli_epi32(statev[J], 11),                \
00796       z = _mm_srli_si128(s, 1);                         \
00797     s = _mm_slli_epi32(r, 18);                          \
00798     z = _mm_xor_si128(z, x);                            \
00799     x = _mm_slli_si128(x, 1);                           \
00800     z = _mm_xor_si128(z, s);                            \
00801     y = _mm_and_si128(y, m);                            \
00802     z = _mm_xor_si128(z, x);                            \
00803     s = r;                                              \
00804     r = _mm_xor_si128(z, y);                            \
00805     _mm_store_si128(statev + I, r);                     \
00806   }
00807 
00808   // This undoes SFMT19937_STEP.  Trivially, we have
00809   //
00810   //     w_i A = w_{i+N} xor w_{i+M} B xor w_{i+N-2} C xor w_{i+N-1} D
00811   //
00812   // Given w_i A we can determine w_i from the observation that A^16 =
00813   // identity, thus
00814   //
00815   //     w_i = (w_i A) A^15
00816   //
00817   // Because x A^(2^n) = x << (8*2^n) xor x, the operation y = x A^15 can be
00818   // implemented as
00819   //
00820   //     y'   = (x    << 64)_128 xor x    = x    A^8
00821   //     y''  = (y'   << 32)_128 xor y'   = y'   A^4 = x A^12
00822   //     y''' = (y''  << 16)_128 xor y''  = y''  A^2 = x A^14
00823   //     y    = (y''' <<  8)_128 xor y''' = y''' A   = x A^15
00824   //
00825   // Here input is I = I + N, J = I + M, K = I + N - 2, L = I + N -1, and
00826   // output is I = I.
00827   //
00828   // This is about 15-35% times slower than SFMT19937_STEPNN, because (1) there
00829   // doesn't appear to be a straightforward way of saving intermediate results
00830   // across calls as with SFMT19937_STEPNN and (2) w A^15 is slower to compute
00831   // than w A.
00832 
00833 #define SFMT19937_REVSTEP128(I, J, K, L) {              \
00834     internal_type x = _mm_load_si128(statev + I),       \
00835       y = _mm_srli_epi32(statev[J], 11),                \
00836       z = _mm_slli_epi32(statev[L], 18);                \
00837     y = _mm_and_si128(y, m);                            \
00838     x = _mm_xor_si128(x, _mm_srli_si128(statev[K], 1)); \
00839     x = _mm_xor_si128(x, z);                            \
00840     x = _mm_xor_si128(x, y);                            \
00841     x = _mm_xor_si128(_mm_slli_si128(x, 8), x);         \
00842     x = _mm_xor_si128(_mm_slli_si128(x, 4), x);         \
00843     x = _mm_xor_si128(_mm_slli_si128(x, 2), x);         \
00844     x = _mm_xor_si128(_mm_slli_si128(x, 1), x);         \
00845     _mm_store_si128(statev + I, x);                     \
00846   }
00847 
00848   template<class RandomType>
00849   void SFMT19937<RandomType>::Transition(long long count,
00850                                          internal_type statev[])
00851     throw() {
00852     const internal_type m = _mm_set_epi32(magic3, magic2, magic1, magic0);
00853     if (count > 0) {
00854       internal_type s = _mm_load_si128(statev + N128 - 2),
00855         r = _mm_load_si128(statev + N128 - 1);
00856       for (; count; --count) {
00857         unsigned i = 0;
00858         for (; i + M128 < N128; ++i) SFMT19937_STEP128(i, i + M128       );
00859         for (; i < N128       ; ++i) SFMT19937_STEP128(i, i + M128 - N128);
00860       }
00861     } else if (count < 0)
00862       for (; count; ++count) {
00863         unsigned i = N128;
00864         for (; i + M128 > N128;) {
00865           --i; SFMT19937_REVSTEP128(i, i + M128 - N128, i - 2, i - 1);
00866         }
00867         for (; i > 2;) {
00868           --i; SFMT19937_REVSTEP128(i, i + M128, i - 2, i - 1);
00869         }
00870         SFMT19937_REVSTEP128(1, M128 + 1, N128 - 1, 0       ); // i = 1
00871         SFMT19937_REVSTEP128(0, M128    , N128 - 2, N128 - 1); // i = 0
00872       }
00873   }
00874 
00875 #undef SFMT19937_STEP128
00876 #undef SFMT19937_REVSTEP128
00877 
00878 #elif HAVE_ALTIVEC
00879 
00880   // The Altivec versions of SFMT19937_{,REV}STEP128 are simply translated from
00881   // the SSE2 versions.  The only significant differences arise because of the
00882   // MSB ordering of the PowerPC.  This means that teh 32-bit and 64-bit
00883   // versions are no different because 32-bit and 64-bit words don't pack
00884   // together in the same way as on an SSE2 machine (see the two definitions of
00885   // magic).  This also means that the 128-bit byte shifts on an LSB machine
00886   // change into more complicated byte permutations.
00887 
00888 #define ALTIVEC_PERM(X, P) vec_perm(X, P, P)
00889 
00890 #define SFMT19937_STEP128(I, J) {                               \
00891     internal_type x = statev[I],                                \
00892       z = vec_xor(vec_xor(ALTIVEC_PERM(s, right1), x),          \
00893                   vec_sl(r, bitleft));                          \
00894     s = r;                                                      \
00895     r = vec_xor(z,                                              \
00896                 vec_xor(ALTIVEC_PERM(x, left1),                 \
00897                         vec_and(vec_sr(statev[J], bitright),    \
00898                                 magic)));                       \
00899     statev[I] = r;                                              \
00900   }
00901 
00902 #define SFMT19937_REVSTEP128(I, J, K, L) {              \
00903     internal_type x = statev[I],                        \
00904       y = vec_sr(statev[J], bitright),                  \
00905       z = vec_sl(statev[L], bitleft);                   \
00906     y = vec_and(y, magic);                              \
00907     x = vec_xor(x, ALTIVEC_PERM(statev[K], right1));    \
00908     x = vec_xor(x, z);                                  \
00909     x = vec_xor(x, y);                                  \
00910     x = vec_xor(ALTIVEC_PERM(x, left8), x);             \
00911     x = vec_xor(ALTIVEC_PERM(x, left4), x);             \
00912     x = vec_xor(ALTIVEC_PERM(x, left2), x);             \
00913     statev[I] = vec_xor(ALTIVEC_PERM(x, left1), x);     \
00914   }
00915 
00916   template<class RandomType>
00917   void SFMT19937<RandomType>::Transition(long long count,
00918                                          internal_type statev[])
00919     throw() {
00920     const internal_type magic = width == 32 ?
00921       (vector unsigned)(magic0, magic1, magic2, magic3) :
00922       (vector unsigned)(magic1, magic0, magic3, magic2),
00923       bitleft = (vector unsigned)(18, 18, 18, 18),
00924       bitright = (vector unsigned)(11, 11, 11, 11);
00925     // Shift left and right by 1 byte.  Note that vec_perm(X, Y, P) glues X and
00926     // Y together into a 32-byte quantity and then the 16-byte permutation
00927     // vector P specifies which bytes to put into the 16-byte output.  We
00928     // follow here the convention of using Y = P and using the zero entries in
00929     // P to allow zero bytes to be introduces into the shifted output.  The
00930     // following describes how the left1 table (32-bit version) is produced:
00931     //
00932     // Byte layout of original with LSB ordering
00933     // 33 32 31 30  23 22 21 20  13 12 11 10  03 02 01 00
00934     // shift left by 1 byte (z means zeros enter)
00935     // 32 31 30 23  22 21 20 13  12 11 10 03  02 01 00 zz
00936     //
00937     // Rearrange original to LSB order in 4-byte units
00938     // 03 02 01 00  13 12 11 10  23 22 21 20  33 32 31 30
00939     // with sequential MSB byte indices
00940     // 0  1  2  3   4  5  6  7   8  9 10 11  12 13 14 15
00941     //
00942     // Rearrange shift left verion to LSB order in 4-byte units
00943     // 02 01 00 zz  12 11 10 03  22 21 20 13  32 31 30 23
00944     // with corresponding MSB byte indices
00945     // 1  2  3  z   5  6  7  0   9 10 11  4  13 14 15  8
00946     //
00947     // Replace byte index at x by 16 + index of 0 = 16 + 7 = 23 to give
00948     // 1  2  3 23   5  6  7  0   9 10 11  4  13 14 15  8
00949     const vector unsigned char left1 = width == 32 ?
00950       (vector unsigned char)(1,2,3,23, 5,6,7,0, 9,10,11,4, 13,14,15,8) :
00951       (vector unsigned char)(1,2,3,4,5,6,7,31, 9,10,11,12,13,14,15,0),
00952       right1 = width == 32 ?
00953       (vector unsigned char)(7,0,1,2, 11,4,5,6, 15,8,9,10, 17,12,13,14) :
00954       (vector unsigned char)(15,0,1,2,3,4,5,6, 17,8,9,10,11,12,13,14);
00955     if (count > 0) {
00956       internal_type s = statev[N128 - 2],
00957         r = statev[N128 - 1];
00958       for (; count; --count) {
00959         unsigned i = 0;
00960         for (; i + M128 < N128; ++i) SFMT19937_STEP128(i, i + M128       );
00961         for (; i < N128       ; ++i) SFMT19937_STEP128(i, i + M128 - N128);
00962       }
00963     } else if (count < 0) {
00964       // leftN shifts left by N bytes.
00965       const vector unsigned char left2 = width == 32 ?
00966         (vector unsigned char)(2,3,22,22, 6,7,0,1, 10,11,4,5, 14,15,8,9) :
00967         (vector unsigned char)(2,3,4,5,6,7,30,30, 10,11,12,13,14,15,0,1),
00968         left4 = width == 32 ?
00969         (vector unsigned char)(20,20,20,20, 0,1,2,3, 4,5,6,7, 8,9,10,11) :
00970         (vector unsigned char)(4,5,6,7,28,28,28,28, 12,13,14,15,0,1,2,3),
00971         left8 = (vector unsigned char)(24,24,24,24,24,24,24,24,0,1,2,3,4,5,6,7);
00972       for (; count; ++count) {
00973         unsigned i = N128;
00974         for (; i + M128 > N128;) {
00975           --i; SFMT19937_REVSTEP128(i, i + M128 - N128, i - 2, i - 1);
00976         }
00977         for (; i > 2;) {
00978           --i; SFMT19937_REVSTEP128(i, i + M128, i - 2, i - 1);
00979         }
00980         SFMT19937_REVSTEP128(1, M128 + 1, N128 - 1, 0       ); // i = 1
00981         SFMT19937_REVSTEP128(0, M128    , N128 - 2, N128 - 1); // i = 0
00982       }
00983     }
00984   }
00985 
00986 #undef SFMT19937_STEP128
00987 #undef SFMT19937_REVSTEP128
00988 #undef ALTVEC_PERM
00989 
00990 #else  // neither HAVE_SSE2 or HAVE_ALTIVEC
00991 
00992 #define SFMT19937_STEP32(I, J) {                                \
00993     internal_type t = statev[I] ^ statev[I] << 8 ^              \
00994       (statev[J] >> 11 & magic0) ^                              \
00995       (s0 >> 8 | s1 << 24) ^ r0 << 18;                          \
00996     s0 = r0; r0 = t & mask;                                     \
00997     t = statev[I + 1] ^                                         \
00998       (statev[I + 1] << 8 | statev[I] >> 24) ^                  \
00999       (statev[J + 1] >> 11 & magic1) ^                          \
01000       (s1 >> 8 | s2 << 24) ^ r1 << 18;                          \
01001     s1 = r1; r1 = t & mask;                                     \
01002     t = statev[I + 2] ^                                         \
01003       (statev[I + 2] << 8 | statev[I + 1] >> 24) ^              \
01004       (statev[J + 2] >> 11 & magic2) ^                          \
01005       (s2 >> 8 | s3 << 24) ^ r2 << 18;                          \
01006     s2 = r2; r2 = t & mask;                                     \
01007     t = statev[I + 3] ^                                         \
01008       (statev[I + 3] << 8 | statev[I + 2] >> 24) ^              \
01009       (statev[J + 3] >> 11 & magic3) ^ s3 >> 8 ^ r3 << 18;      \
01010     s3 = r3; r3 = t & mask;                                     \
01011     statev[I    ] = r0; statev[I + 1] = r1;                     \
01012     statev[I + 2] = r2; statev[I + 3] = r3;                     \
01013   }
01014 
01015 #define SFMT19937_REVSTEP32(I, J, K, L) {                       \
01016     internal_type                                               \
01017       t0 = (statev[I] ^ (statev[J] >> 11 & magic0) ^            \
01018             (statev[K] >> 8 | statev[K + 1] << 24) ^            \
01019             statev[L] << 18) & mask,                            \
01020       t1 = (statev[I + 1] ^                                     \
01021             (statev[J + 1] >> 11 & magic1) ^                    \
01022             (statev[K + 1] >> 8 | statev[K + 2] << 24) ^        \
01023             statev[L + 1] << 18) & mask,                        \
01024       t2 = (statev[I + 2] ^                                     \
01025             (statev[J + 2] >> 11 & magic2) ^                    \
01026             (statev[K + 2] >> 8 | statev[K + 3] << 24) ^        \
01027             statev[L + 2] << 18) & mask,                        \
01028       t3 = (statev[I + 3] ^                                     \
01029             (statev[J + 3] >> 11 & magic3) ^                    \
01030             statev[K + 3] >> 8 ^                                \
01031             statev[L + 3] << 18) & mask;                        \
01032     t3 ^= t1; t2 ^= t0; t3 ^= t2; t2 ^= t1; t1 ^= t0;           \
01033     t3 ^= t2 >> 16 | (t3 << 16 & mask);                         \
01034     t2 ^= t1 >> 16 | (t2 << 16 & mask);                         \
01035     t1 ^= t0 >> 16 | (t1 << 16 & mask);                         \
01036     t0 ^=             t0 << 16 & mask;                          \
01037     statev[I    ] = t0 ^             (t0 << 8 & mask);          \
01038     statev[I + 1] = t1 ^ (t0 >> 24 | (t1 << 8 & mask));         \
01039     statev[I + 2] = t2 ^ (t1 >> 24 | (t2 << 8 & mask));         \
01040     statev[I + 3] = t3 ^ (t2 >> 24 | (t3 << 8 & mask));         \
01041  }
01042 
01043   template<>
01044   void SFMT19937<Random_u32>::Transition(long long count,
01045                                          internal_type statev[])
01046     throw() {
01047     if (count > 0) {
01048       // x[i+N] = g(x[i], x[i+M], x[i+N-2], x[i,N-1])
01049       internal_type
01050         s0 = statev[N - 8], s1 = statev[N - 7],
01051         s2 = statev[N - 6], s3 = statev[N - 5],
01052         r0 = statev[N - 4], r1 = statev[N - 3],
01053         r2 = statev[N - 2], r3 = statev[N - 1];
01054       for (; count; --count) {
01055         unsigned i = 0;
01056         for (; i + M < N; i += R) SFMT19937_STEP32(i, i + M    );
01057         for (; i < N    ; i += R) SFMT19937_STEP32(i, i + M - N);
01058       }
01059     } else if (count < 0)
01060       for (; count; ++count) {
01061       unsigned i = N;
01062       for (; i + M > N;) {
01063         i -= R; SFMT19937_REVSTEP32(i, i + M - N, i - 2 * R, i - R);
01064       }
01065       for (; i > 2 * R;) {
01066         i -= R; SFMT19937_REVSTEP32(i, i + M    , i - 2 * R, i - R);
01067       }
01068       SFMT19937_REVSTEP32(R, M + R, N -     R, 0    ); // i = R
01069       SFMT19937_REVSTEP32(0, M    , N - 2 * R, N - R); // i = 0
01070     }
01071   }
01072 
01073 #undef SFMT19937_STEP32
01074 #undef SFMT19937_REVSTEP32
01075 
01076 #define SFMT19937_STEP64(I, J) {                        \
01077     internal_type t = statev[I] ^ statev[I] << 8 ^      \
01078       (statev[J] >> 11 & magic0) ^                      \
01079       (s0 >> 8 | s1 << 56) ^ (r0 << 18 & mask18);       \
01080     s0 = r0; r0 = t & mask;                             \
01081     t = statev[I + 1] ^                                 \
01082       (statev[I + 1] << 8 | statev[I] >> 56) ^          \
01083       (statev[J + 1] >> 11 & magic1) ^                  \
01084       s1 >> 8 ^ (r1 << 18 & mask18);                    \
01085     s1 = r1; r1 = t & mask;                             \
01086     statev[I] = r0; statev[I + 1] = r1;                 \
01087   }
01088 
01089   // In combining the left and right shifts to simulate a 128-bit shift we
01090   // usually use or.  However we can equivalently use xor (e.g., t1 << 8 ^ t0
01091   // >> 56 instead of t1 ^ t1 << 8 | t0 >> 56) and this speeds up the code if
01092   // used in some places.
01093 
01094 #define SFMT19937_REVSTEP64(I, J, K, L) {                       \
01095     internal_type                                               \
01096       t0 = statev[I] ^ (statev[J] >> 11 & magic0) ^             \
01097       (statev[K] >> 8 | (statev[K + 1] << 56 & mask)) ^         \
01098       (statev[L] << 18 & mask18),                               \
01099       t1 = statev[I + 1] ^ (statev[J + 1] >> 11 & magic1) ^     \
01100       statev[K + 1] >> 8 ^ (statev[L + 1] << 18 & mask18);      \
01101     t1 ^= t0;                                                   \
01102     t1 ^= t0 >> 32 ^ (t1 << 32 & mask);                         \
01103     t0 ^=             t0 << 32 & mask;                          \
01104     t1 ^= t0 >> 48 ^ (t1 << 16 & mask);                         \
01105     t0 ^=             t0 << 16 & mask;                          \
01106     statev[I    ] = t0 ^            (t0 << 8 & mask);           \
01107     statev[I + 1] = t1 ^ t0 >> 56 ^ (t1 << 8 & mask);           \
01108  }
01109 
01110   template<>
01111   void SFMT19937<Random_u64>::Transition(long long count,
01112                                          internal_type statev[])
01113     throw() {
01114     // x[i+N] = g(x[i], x[i+M], x[i+N-2], x[i,N-1])
01115     if (count > 0) {
01116       internal_type
01117         s0 = statev[N - 4], s1 = statev[N - 3],
01118         r0 = statev[N - 2], r1 = statev[N - 1];
01119       for (; count; --count) {
01120         unsigned i = 0;
01121         for (; i + M < N; i += R) SFMT19937_STEP64(i, i + M    );
01122         for (; i < N    ; i += R) SFMT19937_STEP64(i, i + M - N);
01123       }
01124     } else if (count < 0)
01125       for (; count; ++count) {
01126         unsigned i = N;
01127         for (; i + M > N;) {
01128           i -= R; SFMT19937_REVSTEP64(i, i + M - N, i - 2 * R, i - R);
01129         }
01130         for (; i > 2 * R;) {
01131           i -= R; SFMT19937_REVSTEP64(i, i + M    , i - 2 * R, i - R);
01132         }
01133         SFMT19937_REVSTEP64(R, M + R, N -     R, 0    ); // i = R
01134         SFMT19937_REVSTEP64(0, M    , N - 2 * R, N - R); // i = 0
01135       }
01136   }
01137 
01138 #undef SFMT19937_STEP64
01139 #undef SFMT19937_REVSTEP64
01140 
01141 #endif  // HAVE_SSE2
01142 
01143   template<>
01144   void SFMT19937<Random_u32>::NormalizeState(engine_type state[]) throw () {
01145     // Carry out the Period Certification for SFMT19937
01146     engine_type inner = (state[0] & PARITY0) ^ (state[1] & PARITY1) ^
01147       (state[2] & PARITY2) ^ (state[3] & PARITY3);
01148     for (unsigned s = 16; s; s >>= 1)
01149       inner ^= inner >> s;
01150     STATIC_ASSERT(PARITY_LSB < 32 && PARITY0 & 1u << PARITY_LSB,
01151                   "inconsistent PARITY_LSB or PARITY0");
01152     // Now inner & 1 is the parity of the number of 1 bits in w_0 & p.
01153     if ((inner & 1u) == 0)
01154       // Change bit of w_0 corresponding to LSB of PARITY
01155       state[PARITY_LSB >> 5] ^= engine_type(1u) << (PARITY_LSB & 31u);
01156   }
01157 
01158   template<>
01159   void SFMT19937<Random_u64>::NormalizeState(engine_type state[]) throw () {
01160     // Carry out the Period Certification for SFMT19937
01161     engine_type inner = (state[0] & PARITY0) ^ (state[1] & PARITY1);
01162     for (unsigned s = 32; s; s >>= 1)
01163       inner ^= inner >> s;
01164     STATIC_ASSERT(PARITY_LSB < 64 && PARITY0 & 1u << PARITY_LSB,
01165                   "inconsistent PARITY_LSB or PARITY0");
01166     // Now inner & 1 is the parity of the number of 1 bits in w_0 & p.
01167     if ((inner & 1u) == 0)
01168       // Change bit of w_0 corresponding to LSB of PARITY
01169       state[PARITY_LSB >> 6] ^= engine_type(1u) << (PARITY_LSB & 63u);
01170   }
01171 
01172   template<class RandomType>
01173   void SFMT19937<RandomType>::CheckState(const engine_type state[],
01174                                          Random_u32::type& check)
01175     throw(std::out_of_range) {
01176     engine_type x = 0;
01177     Random_u32::type c = check;
01178     for (unsigned i = 0; i < N; ++i) {
01179       engine_t::CheckSum(state[i], c);
01180       x |= state[i];
01181     }
01182     if (x == 0)
01183       throw std::out_of_range("SFMT19937: All-zero state");
01184     check = c;
01185   }
01186 
01187   // RandomPower2 implementation
01188 
01189 #if RANDOM_POWERTABLE
01190   // Powers of two.  Just use floats here.  As long as there's no overflow
01191   // or underflow these are exact.  In particular they can be cast to
01192   // doubles or long doubles with no error.
01193   const float RandomPower2::power2[maxpow - minpow + 1] = {
01194 #if RANDOM_LONGDOUBLEPREC > 64
01195     // It would be nice to be able to use the C99 notation of 0x1.0p-120
01196     // for 2^-120 here.
01197     1/1329227995784915872903807060280344576.f, // 2^-120
01198     1/664613997892457936451903530140172288.f, // 2^-119
01199     1/332306998946228968225951765070086144.f, // 2^-118
01200     1/166153499473114484112975882535043072.f, // 2^-117
01201     1/83076749736557242056487941267521536.f, // 2^-116
01202     1/41538374868278621028243970633760768.f, // 2^-115
01203     1/20769187434139310514121985316880384.f, // 2^-114
01204     1/10384593717069655257060992658440192.f, // 2^-113
01205     1/5192296858534827628530496329220096.f, // 2^-112
01206     1/2596148429267413814265248164610048.f, // 2^-111
01207     1/1298074214633706907132624082305024.f, // 2^-110
01208     1/649037107316853453566312041152512.f, // 2^-109
01209     1/324518553658426726783156020576256.f, // 2^-108
01210     1/162259276829213363391578010288128.f, // 2^-107
01211     1/81129638414606681695789005144064.f, // 2^-106
01212     1/40564819207303340847894502572032.f, // 2^-105
01213     1/20282409603651670423947251286016.f, // 2^-104
01214     1/10141204801825835211973625643008.f, // 2^-103
01215     1/5070602400912917605986812821504.f, // 2^-102
01216     1/2535301200456458802993406410752.f, // 2^-101
01217     1/1267650600228229401496703205376.f, // 2^-100
01218     1/633825300114114700748351602688.f, // 2^-99
01219     1/316912650057057350374175801344.f, // 2^-98
01220     1/158456325028528675187087900672.f, // 2^-97
01221     1/79228162514264337593543950336.f, // 2^-96
01222     1/39614081257132168796771975168.f, // 2^-95
01223     1/19807040628566084398385987584.f, // 2^-94
01224     1/9903520314283042199192993792.f, // 2^-93
01225     1/4951760157141521099596496896.f, // 2^-92
01226     1/2475880078570760549798248448.f, // 2^-91
01227     1/1237940039285380274899124224.f, // 2^-90
01228     1/618970019642690137449562112.f, // 2^-89
01229     1/309485009821345068724781056.f, // 2^-88
01230     1/154742504910672534362390528.f, // 2^-87
01231     1/77371252455336267181195264.f, // 2^-86
01232     1/38685626227668133590597632.f, // 2^-85
01233     1/19342813113834066795298816.f, // 2^-84
01234     1/9671406556917033397649408.f, // 2^-83
01235     1/4835703278458516698824704.f, // 2^-82
01236     1/2417851639229258349412352.f, // 2^-81
01237     1/1208925819614629174706176.f, // 2^-80
01238     1/604462909807314587353088.f, // 2^-79
01239     1/302231454903657293676544.f, // 2^-78
01240     1/151115727451828646838272.f, // 2^-77
01241     1/75557863725914323419136.f, // 2^-76
01242     1/37778931862957161709568.f, // 2^-75
01243     1/18889465931478580854784.f, // 2^-74
01244     1/9444732965739290427392.f, // 2^-73
01245     1/4722366482869645213696.f, // 2^-72
01246     1/2361183241434822606848.f, // 2^-71
01247     1/1180591620717411303424.f, // 2^-70
01248     1/590295810358705651712.f,  // 2^-69
01249     1/295147905179352825856.f,  // 2^-68
01250     1/147573952589676412928.f,  // 2^-67
01251     1/73786976294838206464.f,   // 2^-66
01252     1/36893488147419103232.f,   // 2^-65
01253 #endif
01254     1/18446744073709551616.f,   // 2^-64
01255     1/9223372036854775808.f,    // 2^-63
01256     1/4611686018427387904.f,    // 2^-62
01257     1/2305843009213693952.f,    // 2^-61
01258     1/1152921504606846976.f,    // 2^-60
01259     1/576460752303423488.f,     // 2^-59
01260     1/288230376151711744.f,     // 2^-58
01261     1/144115188075855872.f,     // 2^-57
01262     1/72057594037927936.f,      // 2^-56
01263     1/36028797018963968.f,      // 2^-55
01264     1/18014398509481984.f,      // 2^-54
01265     1/9007199254740992.f,       // 2^-53
01266     1/4503599627370496.f,       // 2^-52
01267     1/2251799813685248.f,       // 2^-51
01268     1/1125899906842624.f,       // 2^-50
01269     1/562949953421312.f,        // 2^-49
01270     1/281474976710656.f,        // 2^-48
01271     1/140737488355328.f,        // 2^-47
01272     1/70368744177664.f,         // 2^-46
01273     1/35184372088832.f,         // 2^-45
01274     1/17592186044416.f,         // 2^-44
01275     1/8796093022208.f,          // 2^-43
01276     1/4398046511104.f,          // 2^-42
01277     1/2199023255552.f,          // 2^-41
01278     1/1099511627776.f,          // 2^-40
01279     1/549755813888.f,           // 2^-39
01280     1/274877906944.f,           // 2^-38
01281     1/137438953472.f,           // 2^-37
01282     1/68719476736.f,            // 2^-36
01283     1/34359738368.f,            // 2^-35
01284     1/17179869184.f,            // 2^-34
01285     1/8589934592.f,             // 2^-33
01286     1/4294967296.f,             // 2^-32
01287     1/2147483648.f,             // 2^-31
01288     1/1073741824.f,             // 2^-30
01289     1/536870912.f,              // 2^-29
01290     1/268435456.f,              // 2^-28
01291     1/134217728.f,              // 2^-27
01292     1/67108864.f,               // 2^-26
01293     1/33554432.f,               // 2^-25
01294     1/16777216.f,               // 2^-24
01295     1/8388608.f,                // 2^-23
01296     1/4194304.f,                // 2^-22
01297     1/2097152.f,                // 2^-21
01298     1/1048576.f,                // 2^-20
01299     1/524288.f,                 // 2^-19
01300     1/262144.f,                 // 2^-18
01301     1/131072.f,                 // 2^-17
01302     1/65536.f,                  // 2^-16
01303     1/32768.f,                  // 2^-15
01304     1/16384.f,                  // 2^-14
01305     1/8192.f,                   // 2^-13
01306     1/4096.f,                   // 2^-12
01307     1/2048.f,                   // 2^-11
01308     1/1024.f,                   // 2^-10
01309     1/512.f,                    // 2^-9
01310     1/256.f,                    // 2^-8
01311     1/128.f,                    // 2^-7
01312     1/64.f,                     // 2^-6
01313     1/32.f,                     // 2^-5
01314     1/16.f,                     // 2^-4
01315     1/8.f,                      // 2^-3
01316     1/4.f,                      // 2^-2
01317     1/2.f,                      // 2^-1
01318     1.f,                        // 2^0
01319     2.f,                        // 2^1
01320     4.f,                        // 2^2
01321     8.f,                        // 2^3
01322     16.f,                       // 2^4
01323     32.f,                       // 2^5
01324     64.f,                       // 2^6
01325     128.f,                      // 2^7
01326     256.f,                      // 2^8
01327     512.f,                      // 2^9
01328     1024.f,                     // 2^10
01329     2048.f,                     // 2^11
01330     4096.f,                     // 2^12
01331     8192.f,                     // 2^13
01332     16384.f,                    // 2^14
01333     32768.f,                    // 2^15
01334     65536.f,                    // 2^16
01335     131072.f,                   // 2^17
01336     262144.f,                   // 2^18
01337     524288.f,                   // 2^19
01338     1048576.f,                  // 2^20
01339     2097152.f,                  // 2^21
01340     4194304.f,                  // 2^22
01341     8388608.f,                  // 2^23
01342     16777216.f,                 // 2^24
01343     33554432.f,                 // 2^25
01344     67108864.f,                 // 2^26
01345     134217728.f,                // 2^27
01346     268435456.f,                // 2^28
01347     536870912.f,                // 2^29
01348     1073741824.f,               // 2^30
01349     2147483648.f,               // 2^31
01350     4294967296.f,               // 2^32
01351     8589934592.f,               // 2^33
01352     17179869184.f,              // 2^34
01353     34359738368.f,              // 2^35
01354     68719476736.f,              // 2^36
01355     137438953472.f,             // 2^37
01356     274877906944.f,             // 2^38
01357     549755813888.f,             // 2^39
01358     1099511627776.f,            // 2^40
01359     2199023255552.f,            // 2^41
01360     4398046511104.f,            // 2^42
01361     8796093022208.f,            // 2^43
01362     17592186044416.f,           // 2^44
01363     35184372088832.f,           // 2^45
01364     70368744177664.f,           // 2^46
01365     140737488355328.f,          // 2^47
01366     281474976710656.f,          // 2^48
01367     562949953421312.f,          // 2^49
01368     1125899906842624.f,         // 2^50
01369     2251799813685248.f,         // 2^51
01370     4503599627370496.f,         // 2^52
01371     9007199254740992.f,         // 2^53
01372     18014398509481984.f,        // 2^54
01373     36028797018963968.f,        // 2^55
01374     72057594037927936.f,        // 2^56
01375     144115188075855872.f,       // 2^57
01376     288230376151711744.f,       // 2^58
01377     576460752303423488.f,       // 2^59
01378     1152921504606846976.f,      // 2^60
01379     2305843009213693952.f,      // 2^61
01380     4611686018427387904.f,      // 2^62
01381     9223372036854775808.f,      // 2^63
01382     18446744073709551616.f,     // 2^64
01383   };
01384 #endif
01385 
01386   // RandomEngine (and implicitly RandomAlgorithm and RandomMixer)
01387   // instantiations
01388 
01389 #if RANDOM_LEGACY
01390   template class
01391   RandomEngine<MT19937<Random_u32>, MixerMT0<Random_u32> >;
01392   template class
01393   RandomEngine<MT19937<Random_u64>, MixerMT0<Random_u64> >;
01394   template class
01395   RandomEngine<MT19937<Random_u32>, MixerMT1<Random_u32> >;
01396   template class
01397   RandomEngine<MT19937<Random_u64>, MixerMT1<Random_u64> >;
01398 #endif
01399   template class RandomEngine<  MT19937<Random_u32>, MixerSFMT>;
01400   template class RandomEngine<  MT19937<Random_u64>, MixerSFMT>;
01401   template class RandomEngine<SFMT19937<Random_u32>, MixerSFMT>;
01402   template class RandomEngine<SFMT19937<Random_u64>, MixerSFMT>;
01403 
01404   // RandomCanonial instantiations
01405 
01406   template<> RandomCanonical<MRandomGenerator32>
01407   RandomCanonical<MRandomGenerator32>::Global(std::vector<unsigned long>(0));
01408   template<> RandomCanonical<MRandomGenerator64>
01409   RandomCanonical<MRandomGenerator64>::Global(std::vector<unsigned long>(0));
01410   template<> RandomCanonical<SRandomGenerator32>
01411   RandomCanonical<SRandomGenerator32>::Global(std::vector<unsigned long>(0));
01412   template<> RandomCanonical<SRandomGenerator64>
01413   RandomCanonical<SRandomGenerator64>::Global(std::vector<unsigned long>(0));
01414 
01415 } // namespace RandomLib