RandomLib  1.10
 All Classes Namespaces Files Functions Variables Typedefs Enumerator Friends Macros Pages
Random.cpp
Go to the documentation of this file.
1 /**
2  * \file Random.cpp
3  * \brief Implementation code for %RandomLib
4  *
5  * Copyright (c) Charles Karney (2006-2011) <charles@karney.com> and licensed
6  * under the MIT/X11 License. For more information, see
7  * http://randomlib.sourceforge.net/
8  *
9  * \brief Code for MixerMT0, MixerMT1, MixerSFMT.
10  *
11  * MixerMT0 is adapted from MT19937 (init_by_array) and MT19937_64
12  * (init_by_array64) by Makoto Matsumoto and Takuji Nishimura. See
13  * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c and
14  * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/VERSIONS/C-LANG/mt19937-64.c
15  *
16  * MixerMT1 contains modifications to MixerMT0 by Charles Karney to
17  * correct defects in MixerMT0. This is described in W. E. Brown,
18  * M. Fischler, J. Kowalkowski, M. Paterno, Random Number Generation in C++0X:
19  * A Comprehensive Proposal, version 3, Sept 2006, Sec. 26.4.7.1,
20  * http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2006/n2079.pdf
21  * This has been replaced in the C++11 standard by MixerSFMT.
22  *
23  * MixerSFMT is adapted from SFMT19937's init_by_array Mutsuo Saito given in
24  * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/SFMT-src-1.2.tar.gz and
25  * is part of the C++11 standard; see P. Becker, Working Draft, Standard for
26  * Programming Language C++, Oct. 2007, Sec. 26.4.7.1,
27  * http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2461.pdf
28  *
29  * The adaption to the C++ is copyright (c) Charles Karney (2006-2011)
30  * <charles@karney.com> and licensed under the MIT/X11 License. For more
31  * information, see http://randomlib.sourceforge.net/
32  *
33  * \brief Code for MT19937<T> and SFMT19937<T>.
34  *
35  * MT19937<T> is adapted from MT19937 and MT19937_64 by Makoto Matsumoto and
36  * Takuji Nishimura. See
37  * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c and
38  * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/VERSIONS/C-LANG/mt19937-64.c
39  *
40  * The code for stepping MT19937 backwards is adapted (and simplified) from
41  * revrand() by Katsumi Hagita. See
42  * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/VERSIONS/FORTRAN/REVmt19937b.f
43  *
44  * SFMT19937<T> is adapted from SFMT19937 Mutsuo Saito given in
45  * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/M062821.pdf and
46  * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/SFMT-src-1.2.tar.gz
47  *
48  * The code for stepping SFMT19937 backwards is by Charles Karney.
49  *
50  * The adaption to the C++ is copyright (c) Charles Karney (2006-2011)
51  * <charles@karney.com> and licensed under the MIT/X11 License. For more
52  * information, see http://randomlib.sourceforge.net/
53  **********************************************************************/
54 
55 #define RANDOMLIB_RANDOM_CPP 1
56 
57 /**
58  * Let the header file know that the library is being built.
59  **********************************************************************/
60 #define RANDOMLIB_BUILDING_LIBRARY 1
61 
62 #include <RandomLib/Random.hpp>
63 
64 #if defined(_MSC_VER) || defined(__MINGW32__)
65 #define RANDOMLIB_WINDOWS 1
66 #else
67 #define RANDOMLIB_WINDOWS 0
68 #endif
69 
70 #include <fstream> // For SeedWord reading /dev/urandom
71 #include <ctime> // For SeedWord calling time()
72 #include <sstream> // For formatting in Write32/Read32
73 #include <iomanip> // For formatting in Write32/Read32
74 #if !RANDOMLIB_WINDOWS
75 #include <sys/time.h> // For SeedWord calling gettimeofday
76 #include <unistd.h> // For SeedWord calling getpid(), gethostid()
77 #else
78 #include <windows.h> // For SeedWord calling high prec timer
79 #include <winbase.h>
80 #include <process.h> // For SeedWord calling getpid()
81 #define getpid _getpid
82 #define gmtime_r(t,g) gmtime_s(g,t)
83 #endif
84 
85 #if RANDOMLIB_WINDOWS || defined(__CYGWIN__)
86 #define strtoull strtoul
87 #endif
88 
89 #if defined(_MSC_VER)
90 // Squelch warnings about constant conditional expressions
91 # pragma warning (disable: 4127)
92 #endif
93 
94 namespace RandomLib {
95 
96  // RandomType implementation
97 
98  template<>
99  void Random_u32::Write32(std::ostream& os, bool bin, int& cnt,
100  Random_u32::type x) {
101  if (bin) {
102  unsigned char buf[4];
103  // Use network order -- most significant byte first
104  buf[3] = (unsigned char)(x);
105  buf[2] = (unsigned char)(x >>= 8);
106  buf[1] = (unsigned char)(x >>= 8);
107  buf[0] = (unsigned char)(x >>= 8);
108  os.write(reinterpret_cast<const char *>(buf), 4);
109  } else {
110  const int longsperline = 72/9;
111  // Use hexadecimal to minimize storage together with stringstream to
112  // isolate the effect of changing the base.
113  std::ostringstream str;
114  // No spacing before or after
115  if (cnt > 0)
116  // Newline every longsperline longs
117  str << (cnt % longsperline ? ' ' : '\n');
118  str << std::hex << x;
119  os << str.str();
120  ++cnt;
121  }
122  }
123 
124  template<>
125  void Random_u32::Read32(std::istream& is, bool bin, Random_u32::type& x) {
126  if (bin) {
127  unsigned char buf[4];
128  is.read(reinterpret_cast<char *>(buf), 4);
129  // Use network order -- most significant byte first
130  x = Random_u32::type(buf[0]) << 24 | Random_u32::type(buf[1]) << 16 |
131  Random_u32::type(buf[2]) << 8 | Random_u32::type(buf[3]);
132  } else {
133  std::string s;
134  is >> std::ws >> s;
135  // Use hexadecimal to minimize storage together with stringstream to
136  // isolate the effect of changing the base.
137  std::istringstream str(s);
138  str >> std::hex >> x;
139  }
140  x &= Random_u32::mask;
141  }
142 
143  template<>
144  void Random_u64::Write32(std::ostream& os, bool bin, int& cnt,
145  Random_u64::type x) {
146  Random_u32::Write32(os, bin, cnt, Random_u32::cast(x >> 32));
147  Random_u32::Write32(os, bin, cnt, Random_u32::cast(x ));
148  }
149 
150  template<>
151  void Random_u64::Read32(std::istream& is, bool bin, Random_u64::type& x) {
153  Random_u32::Read32(is, bin, t);
154  x = Random_u64::type(t) << 32;
155  Random_u32::Read32(is, bin, t);
156  x |= Random_u64::type(t);
157  }
158 
159  // RandomSeed implementation
160 
162  // Check that the assumptions made about the capabilities of the number
163  // system are valid.
164  STATIC_ASSERT(std::numeric_limits<seed_type>::radix == 2 &&
165  !std::numeric_limits<seed_type>::is_signed &&
166  std::numeric_limits<seed_type>::digits >= 32,
167  "seed_type is a bad type");
168  u32::type t = 0;
169  // Linux has /dev/urandom to initialize the seed randomly. (Use
170  // /dev/urandom instead of /dev/random because it does not block.)
171  {
172  std::ifstream f("/dev/urandom", std::ios::binary | std::ios::in);
173  if (f.good()) {
174  // Read 32 bits from /dev/urandom
175  f.read(reinterpret_cast<char *>(&t), sizeof(t));
176  }
177  }
178  std::vector<seed_type> v = SeedVector();
179  for (size_t i = v.size(); i--;)
180  u32::CheckSum(u32::type(v[i]), t);
181  return seed_t::cast(t);
182  }
183 
184  std::vector<RandomSeed::seed_type> RandomSeed::SeedVector() {
185  std::vector<seed_type> v;
186  {
187  // fine-grained timer
188 #if !RANDOMLIB_WINDOWS
189  timeval tv;
190  if (gettimeofday(&tv, 0) == 0)
191  v.push_back(seed_t::cast(tv.tv_usec));
192 #else
193  LARGE_INTEGER taux;
194  if (QueryPerformanceCounter((LARGE_INTEGER *)&taux)) {
195  v.push_back(seed_t::cast(taux.LowPart));
196  v.push_back(seed_t::cast(taux.HighPart));
197  }
198 #endif
199  }
200  // seconds
201  const time_t tim = std::time(0);
202  v.push_back(seed_t::cast(seed_type(tim)));
203  // PID
204  v.push_back(seed_t::cast(getpid()));
205 #if !RANDOMLIB_WINDOWS
206  // host ID
207  v.push_back(seed_t::cast(gethostid()));
208 #endif
209  {
210  // year
211 #if !defined(__MINGW32__)
212  tm gt;
213  gmtime_r(&tim, &gt);
214  v.push_back((seed_type(1900) + seed_t::cast(gt.tm_year)));
215 #else
216  tm* gt = gmtime(&tim);
217  v.push_back((seed_type(1900) + seed_t::cast(gt->tm_year)));
218 #endif
219  }
220  // Candidates for additional elements:
221  // ip address(es) of computer, thread index.
222  std::transform(v.begin(), v.end(), v.begin(), seed_t::cast<seed_type>);
223  return v;
224  }
225 
226  std::vector<RandomSeed::seed_type>
227  RandomSeed::StringToVector(const std::string& s) {
228  std::vector<seed_type> v(0);
229  const char* c = s.c_str();
230  char* q;
231  std::string::size_type p = 0;
232  while (true) {
233  p = s.find_first_of("0123456789", p);
234  if (p == std::string::npos)
235  break;
236  v.push_back(seed_t::cast(std::strtoull(c + p, &q, 0)));
237  p = q - c;
238  }
239  return v;
240  }
241 
242  // RandomEngine implementation
243 
244  template<class Algorithm, class Mixer>
245  void RandomEngine<Algorithm, Mixer>::Init() throw() {
246  // On exit we have _ptr == N.
247 
248  STATIC_ASSERT(std::numeric_limits<typename mixer_t::type>::radix == 2 &&
249  !std::numeric_limits<typename mixer_t::type>::is_signed &&
250  std::numeric_limits<typename mixer_t::type>::digits >=
251  int(mixer_t::width),
252  "mixer_type is a bad type");
253 
254  STATIC_ASSERT(std::numeric_limits<result_type>::radix == 2 &&
255  !std::numeric_limits<result_type>::is_signed &&
256  std::numeric_limits<result_type>::digits >= width,
257  "engine_type is a bad type");
258 
259  STATIC_ASSERT(mixer_t::width == 32 || mixer_t::width == 64,
260  "Mixer width must be 32 or 64");
261 
262  STATIC_ASSERT(width == 32 || width == 64,
263  "Algorithm width must be 32 or 64");
264 
265  // If the bit-widths are the same then the data sizes must be the same.
266  STATIC_ASSERT(!(mixer_t::width == width) ||
267  sizeof(_stateu) == sizeof(_state),
268  "Same bit-widths but different storage");
269 
270  // Repacking assumes that narrower data type is at least as wasteful than
271  // the broader one.
272  STATIC_ASSERT(!(mixer_t::width < width) ||
273  sizeof(_stateu) >= sizeof(_state),
274  "Narrow data type uses less storage");
275 
276  STATIC_ASSERT(!(mixer_t::width > width) ||
277  sizeof(_stateu) <= sizeof(_state),
278  "Narrow data type uses less storage");
279 
280  // Require that _statev and _state are aligned since no repacking is done
281  // when calling Transition
282  STATIC_ASSERT(sizeof(_statev) == sizeof(_state),
283  "Storage mismatch with internal engine data type");
284 
285  // Convert the seed into state
286  Mixer::SeedToState(_seed, _stateu, NU);
287 
288  // Pack into _state
289  if (mixer_t::width < width) {
290  for (size_t i = 0; i < N; ++i)
291  // Assume 2:1 LSB packing
292  _state[i] = result_type(_stateu[2*i]) |
293  result_type(_stateu[2*i + 1]) <<
294  (mixer_t::width < width ? mixer_t::width : 0);
295  } else if (mixer_t::width > width) {
296  for (size_t i = N; i--;)
297  // Assume 1:2 LSB packing
298  _state[i] = result_t::cast(_stateu[i>>1] >> width * (i&1u));
299  } // Otherwise the union takes care of it
300 
301  Algorithm::NormalizeState(_state);
302 
303  _rounds = -1;
304  _ptr = N;
305  }
306 
307  template<class Algorithm, class Mixer> Random_u32::type
308  RandomEngine<Algorithm, Mixer>::Check(u64::type v, u32::type e,
309  u32::type m) const {
310  if (v != version)
311  throw RandomErr(Name() + ": Unknown version");
312  if (e != Algorithm::version)
313  throw RandomErr(Name() + ": Algorithm mismatch");
314  if (m != Mixer::version)
315  throw RandomErr(Name() + ": Mixer mismatch");
316  u32::type check = 0;
317  u64::CheckSum(v, check);
318  u32::CheckSum(e, check);
319  u32::CheckSum(m, check);
320  u32::CheckSum(u32::type(_seed.size()), check);
321  for (std::vector<seed_type>::const_iterator n = _seed.begin();
322  n != _seed.end(); ++n) {
323  if (*n != seed_t::cast(*n))
324  throw RandomErr(Name() + ": Illegal seed value");
325  u32::CheckSum(u32::type(*n), check);
326  }
327  u32::CheckSum(_ptr, check);
328  if (_stride == 0 || _stride > UNINIT/2)
329  throw RandomErr(Name() + ": Invalid stride");
330  u32::CheckSum(_stride, check);
331  if (_ptr != UNINIT) {
332  if (_ptr >= N + _stride)
333  throw RandomErr(Name() + ": Invalid pointer");
334  u64::CheckSum(_rounds, check);
335  Algorithm::CheckState(_state, check);
336  }
337  return check;
338  }
339 
340  template<typename Algorithm, typename Mixer>
341  RandomEngine<Algorithm, Mixer>::RandomEngine(std::istream& is, bool bin) {
342  u64::type versionr;
343  u32::type versione, versionm, t;
344  u64::Read32(is, bin, versionr);
345  u32::Read32(is, bin, versione);
346  u32::Read32(is, bin, versionm);
347  u32::Read32(is, bin, t);
348  _seed.resize(size_t(t));
349  for (std::vector<seed_type>::iterator n = _seed.begin();
350  n != _seed.end(); ++n) {
351  u32::Read32(is, bin, t);
352  *n = seed_type(t);
353  }
354  u32::Read32(is, bin, t);
355  // Don't need to worry about sign extension because _ptr is unsigned.
356  _ptr = unsigned(t);
357  u32::Read32(is, bin, t);
358  _stride = unsigned(t);
359  if (_ptr != UNINIT) {
360  u64::type p;
361  u64::Read32(is, bin, p);
362  _rounds = (long long)(p);
363  // Sign extension in case long long is bigger than 64 bits.
364  _rounds <<= 63 - std::numeric_limits<long long>::digits;
365  _rounds >>= 63 - std::numeric_limits<long long>::digits;
366  for (unsigned i = 0; i < N; ++i)
367  result_t::Read32(is, bin, _state[i]);
368  }
369  u32::Read32(is, bin, t);
370  if (t != Check(versionr, versione, versionm))
371  throw RandomErr(Name() + ": Checksum failure");
372  }
373 
374  template<typename Algorithm, typename Mixer>
376  bool bin) const {
377  u32::type check = Check(version, Algorithm::version, Mixer::version);
378  int c = 0;
379  u64::Write32(os, bin, c, version);
380  u32::Write32(os, bin, c, Algorithm::version);
381  u32::Write32(os, bin, c, Mixer::version);
382  u32::Write32(os, bin, c, u32::type(_seed.size()));
383  for (std::vector<seed_type>::const_iterator n = _seed.begin();
384  n != _seed.end(); ++n)
385  u32::Write32(os, bin, c, u32::type(*n));
386  u32::Write32(os, bin, c, _ptr);
387  u32::Write32(os, bin, c, _stride);
388  if (_ptr != UNINIT) {
389  u64::Write32(os, bin, c, u64::type(_rounds));
390  for (unsigned i = 0; i < N; ++i)
391  result_t::Write32(os, bin, c, _state[i]);
392  }
393  u32::Write32(os, bin, c, check);
394  }
395 
396  template<typename Algorithm, typename Mixer>
397  void RandomEngine<Algorithm, Mixer>::StepCount(long long n) throw() {
398  // On exit we have 0 <= _ptr <= N.
399  if (_ptr == UNINIT)
400  Init();
401  const long long ncount = n + Count(); // new Count()
402  long long nrounds = ncount / N;
403  int nptr = int(ncount - nrounds * N);
404  // We pick _ptr = N or _ptr = 0 depending on which choice involves the
405  // least work. We thus avoid doing one (potentially unneeded) call to
406  // Transition.
407  if (nptr < 0) {
408  --nrounds;
409  nptr += N;
410  } else if (nptr == 0 && nrounds > _rounds) {
411  nptr = N;
412  --nrounds;
413  }
414  if (nrounds != _rounds)
415  Algorithm::Transition(nrounds - _rounds, _statev);
416  _rounds = nrounds;
417  _ptr = nptr;
418  }
419 
420  template<typename Algorithm, typename Mixer>
422  RandomEngine g(std::vector<seed_type>(0));
423  g.SetCount(10000-1);
424  result_type x = g();
425  if (SelfTestResult(0) && x != SelfTestResult(1))
426  throw RandomErr(Name() + ": Incorrect result with seed " +
427  g.SeedString());
428  seed_type s[] = {0x1234U, 0x5678U, 0x9abcU, 0xdef0U};
429  // seed_type s[] = {1, 2, 3, 4};
430  g.Reseed(s, s+4);
431  g.StepCount(-20000);
432  std::string save;
433  {
434  std::ostringstream stream;
435  stream << g << "\n";
436  save = stream.str();
437  }
438  g.Reset();
439  {
440  std::istringstream stream(save);
441  stream >> g;
442  }
443  g.SetCount(10000);
444  {
445  std::ostringstream stream;
446  g.Save(stream, true);
447  save = stream.str();
448  }
449  {
450  std::istringstream stream(save);
451  RandomEngine h(std::vector<seed_type>(0));
452  h.Load(stream, true);
453  h.SetCount(1000000-1);
454  x = h();
455  if (SelfTestResult(0) && x != SelfTestResult(2))
456  throw RandomErr(Name() + ": Incorrect result with seed " +
457  h.SeedString());
458  g.SetCount(1000000);
459  if (h != g)
460  throw RandomErr(Name() + ": Comparison failure");
461  }
462  }
463 
464  template<> Random_u32::type
466  SelfTestResult(unsigned i) throw() {
467  return i == 0 ? 1 :
468  i == 1 ? 4123659995UL : 3016432305UL;
469  }
470 
471  template<> Random_u64::type
472  RandomEngine<MT19937<Random_u64>, MixerMT0<Random_u64> >::
473  SelfTestResult(unsigned i) throw() {
474  return i == 0 ? 1 :
475  i == 1 ? 9981545732273789042ULL : 1384037754719008581ULL;
476  }
477 
478  template<> Random_u32::type
479  RandomEngine<MT19937<Random_u32>, MixerMT1<Random_u32> >::
480  SelfTestResult(unsigned i) throw() {
481  return i == 0 ? 1 :
482  i == 1 ? 4123659995UL : 2924523180UL;
483  }
484 
485  template<> Random_u64::type
486  RandomEngine<MT19937<Random_u64>, MixerMT1<Random_u64> >::
487  SelfTestResult(unsigned i) throw() {
488  return i == 0 ? 1 :
489  i == 1 ? 9981545732273789042ULL : 5481486777409645478ULL;
490  }
491 
492  template<> Random_u32::type
493  RandomEngine<MT19937<Random_u32>, MixerSFMT>::
494  SelfTestResult(unsigned i) throw() {
495  return i == 0 ? 1 :
496  i == 1 ? 666528879UL : 2183745132UL;
497  }
498 
499  template<> Random_u64::type
500  RandomEngine<MT19937<Random_u64>, MixerSFMT>::
501  SelfTestResult(unsigned i) throw() {
502  return i == 0 ? 1 :
503  i == 1 ? 12176471137395770412ULL : 66914054428611861ULL;
504  }
505 
506  template<> Random_u32::type
507  RandomEngine<SFMT19937<Random_u32>, MixerSFMT>::
508  SelfTestResult(unsigned i) throw() {
509  return i == 0 ? 1 :
510  i == 1 ? 2695024307UL : 782200760UL;
511  }
512 
513  template<> Random_u64::type
514  RandomEngine<SFMT19937<Random_u64>, MixerSFMT>::
515  SelfTestResult(unsigned i) throw() {
516  return i == 0 ? 1 :
517  i == 1 ? 1464461649847485149ULL : 5050640804923595109ULL;
518  }
519 
520  // RandomMixer implementation
521 
522  template<class RandomType> void MixerMT0<RandomType>::
523  SeedToState(const std::vector<RandomSeed::seed_type>& seed,
524  mixer_type state[], unsigned n) throw() {
525  // Adapted from
526  // http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c
527  // http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/VERSIONS/C-LANG/mt19937-64.c
528  const unsigned s = unsigned(seed.size());
529  const unsigned w = mixer_t::width;
530 
531  mixer_type r = s ? a1 + mixer_type(0) : a0 + mixer_type(0);
532  state[0] = r;
533  for (unsigned k = 1; k < n; ++k) {
534  r = b * (r ^ r >> (w - 2)) + k;
535  r &= mask;
536  state[k] = r;
537  }
538  if (s > 0) {
539  const unsigned m = mixer_t::width / 32,
540  s2 = (s + m - 1)/m;
541  unsigned i1 = 1;
542  r = state[0];
543  for (unsigned k = (n > s2 ? n : s2), j = 0;
544  k; --k, i1 = i1 == n - 1 ? 1 : i1 + 1, // i1 = i1 + 1 mod n - 1
545  j = j == s2 - 1 ? 0 : j + 1 ) { // j = j+1 mod s2
546  r = state[i1] ^ c * (r ^ r >> (w - 2));
547  r += j + mixer_type(seed[m * j]) +
548  (m == 1 || 2 * j + 1 == s ? mixer_type(0) :
549  mixer_type(seed[m * j + 1]) << (w - 32));
550  r &= mask;
551  state[i1] = r;
552  }
553  for (unsigned k = n - 1; k; --k,
554  i1 = i1 == n - 1 ? 1 : i1 + 1) { // i1 = i1 + 1 mod n - 1
555  r = state[i1] ^ d * (r ^ r >> (w - 2));
556  r -= i1;
557  r &= mask;
558  state[i1] = r;
559  }
560  state[0] = typename mixer_t::type(1) << (w - 1);
561  }
562  }
563 
564  template<class RandomType> void MixerMT1<RandomType>::
565  SeedToState(const std::vector<RandomSeed::seed_type>& seed,
566  mixer_type state[], unsigned n) throw() {
567  // This is the algorithm given in the seed_seq class described in
568  // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2006/n2079.pdf It is
569  // a modification of
570  // http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c
571  // http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/VERSIONS/C-LANG/mt19937-64.c
572  const unsigned s = unsigned(seed.size());
573  const unsigned w = mixer_t::width;
574 
575  mixer_type r = (a + s) & mask;
576  state[0] = r;
577  for (unsigned k = 1; k < n; ++k) {
578  r = b * (r ^ r >> (w - 2)) + k;
579  r &= mask;
580  state[k] = r;
581  }
582  if (s > 0) {
583  const unsigned m = mixer_t::width / 32,
584  s2 = (s + m - 1)/m;
585  unsigned i1 = 0;
586  for (unsigned k = (n > s2 ? n : s2), j = 0;
587  k; --k, i1 = i1 == n - 1 ? 0 : i1 + 1, // i1 = i1 + 1 mod n
588  j = j == s2 - 1 ? 0 : j + 1 ) { // j = j+1 mod s2
589  r = state[i1] ^ c * (r ^ r >> (w - 2));
590  r += j + mixer_type(seed[m * j]) +
591  (m == 1 || 2 * j + 1 == s ? mixer_type(0) :
592  mixer_type(seed[m * j + 1]) << (w - 32));
593  r &= mask;
594  state[i1] = r;
595  }
596  for (unsigned k = n; k; --k,
597  i1 = i1 == n - 1 ? 0 : i1 + 1) { // i1 = i1 + 1 mod n
598  r = state[i1] ^ d * (r ^ r >> (w - 2));
599  r -= i1;
600  r &= mask;
601  state[i1] = r;
602  }
603  }
604  }
605 
606  void MixerSFMT::SeedToState(const std::vector<RandomSeed::seed_type>& seed,
607  mixer_type state[], unsigned n) throw() {
608  // This is adapted from the routine init_by_array by Mutsuo Saito given in
609  // http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/SFMT-src-1.2.tar.gz
610 
611  if (n == 0)
612  return; // Nothing to do
613 
614  const unsigned s = unsigned(seed.size()),
615  // Add treatment of small n with lag = (n - 1)/2 for n <= 7. In
616  // particular, the first operation (xor or plus) in each for loop
617  // involves three distinct indices for n > 2.
618  lag = n >= 623 ? 11 : (n >= 68 ? 7 : (n >= 39 ? 5 :
619  (n >= 7 ? 3 : (n - 1)/2))),
620  // count = max( s + 1, n )
621  count = s + 1 > n ? s + 1 : n;
622 
623  std::fill(state, state + n, mixer_type(a));
624  const unsigned w = mixer_t::width;
625 
626  unsigned i = 0, k = (n - lag) / 2, l = k + lag;
627  mixer_type r = state[n - 1];
628  for (unsigned j = 0; j < count; ++j,
629  i = i == n - 1 ? 0 : i + 1,
630  k = k == n - 1 ? 0 : k + 1,
631  l = l == n - 1 ? 0 : l + 1) {
632  // Here r = state[(j - 1) mod n]
633  // i = j mod n
634  // k = (j + (n - lag)/2) mod n
635  // l = (j + (n - lag)/2 + lag) mod n
636  r ^= state[i] ^ state[k];
637  r &= mask;
638  r = b * (r ^ r >> (w - 5));
639  state[k] += r;
640  r += i + (j > s ? 0 : (j ? mixer_type(seed[j - 1]) : s));
641  state[l] += r;
642  state[i] = r;
643  }
644 
645  for (unsigned j = n; j; --j,
646  i = i == n - 1 ? 0 : i + 1,
647  k = k == n - 1 ? 0 : k + 1,
648  l = l == n - 1 ? 0 : l + 1) {
649  // Here r = state[(i - 1) mod n]
650  // k = (i + (n - lag)/2) mod n
651  // l = (i + (n - lag)/2 + lag) mod n
652  r += state[i] + state[k];
653  r &= mask;
654  r = c * (r ^ r >> (w - 5));
655  r &= mask;
656  state[k] ^= r;
657  r -= i;
658  r &= mask;
659  state[l] ^= r;
660  state[i] = r;
661  }
662  }
663 
664  // RandomAlgorithm implementation
665 
666  // Here, input is I, J = I + 1, K = I + M; output is I = I + N (mod N)
667 
668 #define MT19937_STEP(I, J, K) statev[I] = statev[K] ^ \
669  (statev[J] & engine_type(1) ? magic : engine_type(0)) ^ \
670  ((statev[I] & upper) | (statev[J] & lower)) >> 1
671 
672  // The code is cleaned up a little from Hagita's Fortran version by getting
673  // rid of the unnecessary masking by YMASK and by using simpler logic to
674  // restore the correct value of _state[0].
675  //
676  // Here input is J = I + N - 1, K = I + M - 1, and p = y[I] (only the high
677  // bits are used); output _state[I] and p = y[I - 1].
678 
679 #define MT19937_REVSTEP(I, J, K) { \
680  engine_type q = statev[J] ^ statev[K], s = q >> (width - 1); \
681  q = (q ^ (s ? magic : engine_type(0))) << 1 | s; \
682  statev[I] = (p & upper) | (q & lower); \
683  p = q; \
684  }
685 
686  template<class RandomType>
687  void MT19937<RandomType>::Transition(long long count, internal_type statev[])
688  throw() {
689  if (count > 0)
690  for (; count; --count) {
691  // This ONLY uses high bit of statev[0]
692  unsigned i = 0;
693  for (; i < N - M; ++i) MT19937_STEP(i, i + 1, i + M );
694  for (; i < N - 1; ++i) MT19937_STEP(i, i + 1, i + M - N);
695  MT19937_STEP(N - 1, 0, M - 1); // i = N - 1
696  }
697  else if (count < 0)
698  for (; count; ++count) {
699  // This ONLY uses high bit of statev[0]
700  engine_type p = statev[0];
701  // Fix low bits of statev[0] and compute y[-1]
702  MT19937_REVSTEP(0, N - 1, M - 1); // i = N
703  unsigned i = N - 1;
704  for (; i > N - M; --i) MT19937_REVSTEP(i, i - 1, i + M - 1 - N);
705  for (; i ; --i) MT19937_REVSTEP(i, i - 1, i + M - 1 );
706  MT19937_REVSTEP(0, N - 1, M - 1); // i = 0
707  }
708  }
709 
710 #undef MT19937_STEP
711 #undef MT19937_REVSTEP
712 
713  template<class RandomType>
714  void MT19937<RandomType>::NormalizeState(engine_type state[]) throw() {
715 
716  // Perform the MT-specific sanity check on the resulting state ensuring
717  // that the significant 19937 bits are not all zero.
718  state[0] &= upper; // Mask out unused bits
719  unsigned i = 0;
720  while (i < N && state[i] == 0)
721  ++i;
722  if (i >= N)
723  state[0] = engine_type(1) << (width - 1); // with prob 2^-19937
724 
725  // This sets the low R bits of _state[0] consistent with the rest of the
726  // state. Needed to handle SetCount(-N); Ran32(); immediately following
727  // reseeding. This wasn't required in the original code because a
728  // Transition was always done first.
729  engine_type q = state[N - 1] ^ state[M - 1], s = q >> (width - 1);
730  q = (q ^ (s ? magic : engine_type(0))) << 1 | s;
731  state[0] = (state[0] & upper) | (q & lower);
732  }
733 
734  template<class RandomType>
735  void MT19937<RandomType>::CheckState(const engine_type state[],
736  Random_u32::type& check) {
737  engine_type x = 0;
738  Random_u32::type c = check;
739  for (unsigned i = 0; i < N; ++i) {
740  engine_t::CheckSum(state[i], c);
741  x |= state[i];
742  }
743  if (x == 0)
744  throw RandomErr("MT19937: All-zero state");
745 
746  // There are only width*(N-1) + 1 = 19937 independent bits of state. Thus
747  // the low width-1 bits of _state[0] are derivable from the other bits in
748  // state. Verify that the redundant bits bits are consistent.
749  engine_type q = state[N - 1] ^ state[M - 1], s = q >> (width - 1);
750  q = (q ^ (s ? magic : engine_type(0))) << 1 | s;
751  if ((q ^ state[0]) & lower)
752  throw RandomErr("MT19937: Invalid state");
753 
754  check = c;
755  }
756 
757 #if defined(HAVE_SSE2) && HAVE_SSE2
758 
759  // Transition is from Saito's Master's Thesis
760  // http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/M062821.pdf
761  //
762  // This implements
763  //
764  // w_{i+N} = w_i A xor w_M B xor w_{i+N-2} C xor w_{i+N-1} D
765  //
766  // where w_i is a 128-bit word and
767  //
768  // w A = (w << 8)_128 xor w
769  // w B = (w >> 11)_32 & MSK
770  // w C = (w >> 8)_128
771  // w D = (w << 18)_32
772  //
773  // Here the _128 means shift is of whole 128-bit word. _32 means the shifts
774  // are independently done on each 32-bit word.
775  //
776  // In SFMT19937_STEP32 and SFMT19937_STEP64 input is I, J = I + M and output
777  // is I = I + N (mod N). On input, s and r give state for I + N - 2 and I +
778  // N - 1; on output s and r give state for I + N - 1 and I + N. The
779  // implementation of 128-bit operations is open-coded in a portable fashion
780  // (with LSB ordering).
781  //
782  // N.B. Here N and M are the lags in units of BitWidth words and so are 4
783  // (for u32 implementation) or 2 (for u64 implementation) times bigger than
784  // defined in Saito's thesis.
785 
786  // This is adapted from SFMT-sse.c in the SFMT 1.2 distribution.
787  // The order of instructions has been rearranged to increase the
788  // speed slightly
789 
790 #define SFMT19937_STEP128(I, J) { \
791  internal_type x = _mm_load_si128(statev + I), \
792  y = _mm_srli_epi32(statev[J], 11), \
793  z = _mm_srli_si128(s, 1); \
794  s = _mm_slli_epi32(r, 18); \
795  z = _mm_xor_si128(z, x); \
796  x = _mm_slli_si128(x, 1); \
797  z = _mm_xor_si128(z, s); \
798  y = _mm_and_si128(y, m); \
799  z = _mm_xor_si128(z, x); \
800  s = r; \
801  r = _mm_xor_si128(z, y); \
802  _mm_store_si128(statev + I, r); \
803  }
804 
805  // This undoes SFMT19937_STEP. Trivially, we have
806  //
807  // w_i A = w_{i+N} xor w_{i+M} B xor w_{i+N-2} C xor w_{i+N-1} D
808  //
809  // Given w_i A we can determine w_i from the observation that A^16 =
810  // identity, thus
811  //
812  // w_i = (w_i A) A^15
813  //
814  // Because x A^(2^n) = x << (8*2^n) xor x, the operation y = x A^15 can be
815  // implemented as
816  //
817  // y' = (x << 64)_128 xor x = x A^8
818  // y'' = (y' << 32)_128 xor y' = y' A^4 = x A^12
819  // y''' = (y'' << 16)_128 xor y'' = y'' A^2 = x A^14
820  // y = (y''' << 8)_128 xor y''' = y''' A = x A^15
821  //
822  // Here input is I = I + N, J = I + M, K = I + N - 2, L = I + N -1, and
823  // output is I = I.
824  //
825  // This is about 15-35% times slower than SFMT19937_STEPNN, because (1) there
826  // doesn't appear to be a straightforward way of saving intermediate results
827  // across calls as with SFMT19937_STEPNN and (2) w A^15 is slower to compute
828  // than w A.
829 
830 #define SFMT19937_REVSTEP128(I, J, K, L) { \
831  internal_type x = _mm_load_si128(statev + I), \
832  y = _mm_srli_epi32(statev[J], 11), \
833  z = _mm_slli_epi32(statev[L], 18); \
834  y = _mm_and_si128(y, m); \
835  x = _mm_xor_si128(x, _mm_srli_si128(statev[K], 1)); \
836  x = _mm_xor_si128(x, z); \
837  x = _mm_xor_si128(x, y); \
838  x = _mm_xor_si128(_mm_slli_si128(x, 8), x); \
839  x = _mm_xor_si128(_mm_slli_si128(x, 4), x); \
840  x = _mm_xor_si128(_mm_slli_si128(x, 2), x); \
841  x = _mm_xor_si128(_mm_slli_si128(x, 1), x); \
842  _mm_store_si128(statev + I, x); \
843  }
844 
845  template<class RandomType>
846  void SFMT19937<RandomType>::Transition(long long count,
847  internal_type statev[])
848  throw() {
849  const internal_type m = _mm_set_epi32(magic3, magic2, magic1, magic0);
850  if (count > 0) {
851  internal_type s = _mm_load_si128(statev + N128 - 2),
852  r = _mm_load_si128(statev + N128 - 1);
853  for (; count; --count) {
854  unsigned i = 0;
855  for (; i + M128 < N128; ++i) SFMT19937_STEP128(i, i + M128 );
856  for (; i < N128 ; ++i) SFMT19937_STEP128(i, i + M128 - N128);
857  }
858  } else if (count < 0)
859  for (; count; ++count) {
860  unsigned i = N128;
861  for (; i + M128 > N128;) {
862  --i; SFMT19937_REVSTEP128(i, i + M128 - N128, i - 2, i - 1);
863  }
864  for (; i > 2;) {
865  --i; SFMT19937_REVSTEP128(i, i + M128, i - 2, i - 1);
866  }
867  SFMT19937_REVSTEP128(1, M128 + 1, N128 - 1, 0 ); // i = 1
868  SFMT19937_REVSTEP128(0, M128 , N128 - 2, N128 - 1); // i = 0
869  }
870  }
871 
872 #undef SFMT19937_STEP128
873 #undef SFMT19937_REVSTEP128
874 
875 #elif defined(HAVE_ALTIVEC) && HAVE_ALTIVEC
876 
877  // The Altivec versions of SFMT19937_{,REV}STEP128 are simply translated from
878  // the SSE2 versions. The only significant differences arise because of the
879  // MSB ordering of the PowerPC. This means that the 32-bit and 64-bit
880  // versions are no different because 32-bit and 64-bit words don't pack
881  // together in the same way as on an SSE2 machine (see the two definitions of
882  // magic). This also means that the 128-bit byte shifts on an LSB machine
883  // change into more complicated byte permutations.
884 
885 #define ALTIVEC_PERM(X, P) vec_perm(X, P, P)
886 
887 #define SFMT19937_STEP128(I, J) { \
888  internal_type x = statev[I], \
889  z = vec_xor(vec_xor(ALTIVEC_PERM(s, right1), x), \
890  vec_sl(r, bitleft)); \
891  s = r; \
892  r = vec_xor(z, \
893  vec_xor(ALTIVEC_PERM(x, left1), \
894  vec_and(vec_sr(statev[J], bitright), \
895  magic))); \
896  statev[I] = r; \
897  }
898 
899 #define SFMT19937_REVSTEP128(I, J, K, L) { \
900  internal_type x = statev[I], \
901  y = vec_sr(statev[J], bitright), \
902  z = vec_sl(statev[L], bitleft); \
903  y = vec_and(y, magic); \
904  x = vec_xor(x, ALTIVEC_PERM(statev[K], right1)); \
905  x = vec_xor(x, z); \
906  x = vec_xor(x, y); \
907  x = vec_xor(ALTIVEC_PERM(x, left8), x); \
908  x = vec_xor(ALTIVEC_PERM(x, left4), x); \
909  x = vec_xor(ALTIVEC_PERM(x, left2), x); \
910  statev[I] = vec_xor(ALTIVEC_PERM(x, left1), x); \
911  }
912 
913  template<class RandomType>
914  void SFMT19937<RandomType>::Transition(long long count,
915  internal_type statev[])
916  throw() {
917  const internal_type magic = width == 32 ?
918  (vector unsigned)(magic0, magic1, magic2, magic3) :
919  (vector unsigned)(magic1, magic0, magic3, magic2),
920  bitleft = (vector unsigned)(18, 18, 18, 18),
921  bitright = (vector unsigned)(11, 11, 11, 11);
922  // Shift left and right by 1 byte. Note that vec_perm(X, Y, P) glues X and
923  // Y together into a 32-byte quantity and then the 16-byte permutation
924  // vector P specifies which bytes to put into the 16-byte output. We
925  // follow here the convention of using Y = P and using the zero entries in
926  // P to allow zero bytes to be introduces into the shifted output. The
927  // following describes how the left1 table (32-bit version) is produced:
928  //
929  // Byte layout of original with LSB ordering
930  // 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
931  // shift left by 1 byte (z means zeros enter)
932  // 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00 zz
933  //
934  // Rearrange original to LSB order in 4-byte units
935  // 03 02 01 00 13 12 11 10 23 22 21 20 33 32 31 30
936  // with sequential MSB byte indices
937  // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
938  //
939  // Rearrange shift left verion to LSB order in 4-byte units
940  // 02 01 00 zz 12 11 10 03 22 21 20 13 32 31 30 23
941  // with corresponding MSB byte indices
942  // 1 2 3 z 5 6 7 0 9 10 11 4 13 14 15 8
943  //
944  // Replace byte index at x by 16 + index of 0 = 16 + 7 = 23 to give
945  // 1 2 3 23 5 6 7 0 9 10 11 4 13 14 15 8
946  const vector unsigned char left1 = width == 32 ?
947  (vector unsigned char)(1,2,3,23, 5,6,7,0, 9,10,11,4, 13,14,15,8) :
948  (vector unsigned char)(1,2,3,4,5,6,7,31, 9,10,11,12,13,14,15,0),
949  right1 = width == 32 ?
950  (vector unsigned char)(7,0,1,2, 11,4,5,6, 15,8,9,10, 17,12,13,14) :
951  (vector unsigned char)(15,0,1,2,3,4,5,6, 17,8,9,10,11,12,13,14);
952  if (count > 0) {
953  internal_type s = statev[N128 - 2],
954  r = statev[N128 - 1];
955  for (; count; --count) {
956  unsigned i = 0;
957  for (; i + M128 < N128; ++i) SFMT19937_STEP128(i, i + M128 );
958  for (; i < N128 ; ++i) SFMT19937_STEP128(i, i + M128 - N128);
959  }
960  } else if (count < 0) {
961  // leftN shifts left by N bytes.
962  const vector unsigned char left2 = width == 32 ?
963  (vector unsigned char)(2,3,22,22, 6,7,0,1, 10,11,4,5, 14,15,8,9) :
964  (vector unsigned char)(2,3,4,5,6,7,30,30, 10,11,12,13,14,15,0,1),
965  left4 = width == 32 ?
966  (vector unsigned char)(20,20,20,20, 0,1,2,3, 4,5,6,7, 8,9,10,11) :
967  (vector unsigned char)(4,5,6,7,28,28,28,28, 12,13,14,15,0,1,2,3),
968  left8 = (vector unsigned char)(24,24,24,24,24,24,24,24,0,1,2,3,4,5,6,7);
969  for (; count; ++count) {
970  unsigned i = N128;
971  for (; i + M128 > N128;) {
972  --i; SFMT19937_REVSTEP128(i, i + M128 - N128, i - 2, i - 1);
973  }
974  for (; i > 2;) {
975  --i; SFMT19937_REVSTEP128(i, i + M128, i - 2, i - 1);
976  }
977  SFMT19937_REVSTEP128(1, M128 + 1, N128 - 1, 0 ); // i = 1
978  SFMT19937_REVSTEP128(0, M128 , N128 - 2, N128 - 1); // i = 0
979  }
980  }
981  }
982 
983 #undef SFMT19937_STEP128
984 #undef SFMT19937_REVSTEP128
985 #undef ALTIVEC_PERM
986 
987 #else // neither HAVE_SSE2 or HAVE_ALTIVEC
988 
989 #define SFMT19937_STEP32(I, J) { \
990  internal_type t = statev[I] ^ statev[I] << 8 ^ \
991  (statev[J] >> 11 & magic0) ^ \
992  (s0 >> 8 | s1 << 24) ^ r0 << 18; \
993  s0 = r0; r0 = t & mask; \
994  t = statev[I + 1] ^ \
995  (statev[I + 1] << 8 | statev[I] >> 24) ^ \
996  (statev[J + 1] >> 11 & magic1) ^ \
997  (s1 >> 8 | s2 << 24) ^ r1 << 18; \
998  s1 = r1; r1 = t & mask; \
999  t = statev[I + 2] ^ \
1000  (statev[I + 2] << 8 | statev[I + 1] >> 24) ^ \
1001  (statev[J + 2] >> 11 & magic2) ^ \
1002  (s2 >> 8 | s3 << 24) ^ r2 << 18; \
1003  s2 = r2; r2 = t & mask; \
1004  t = statev[I + 3] ^ \
1005  (statev[I + 3] << 8 | statev[I + 2] >> 24) ^ \
1006  (statev[J + 3] >> 11 & magic3) ^ s3 >> 8 ^ r3 << 18; \
1007  s3 = r3; r3 = t & mask; \
1008  statev[I ] = r0; statev[I + 1] = r1; \
1009  statev[I + 2] = r2; statev[I + 3] = r3; \
1010  }
1011 
1012 #define SFMT19937_REVSTEP32(I, J, K, L) { \
1013  internal_type \
1014  t0 = (statev[I] ^ (statev[J] >> 11 & magic0) ^ \
1015  (statev[K] >> 8 | statev[K + 1] << 24) ^ \
1016  statev[L] << 18) & mask, \
1017  t1 = (statev[I + 1] ^ \
1018  (statev[J + 1] >> 11 & magic1) ^ \
1019  (statev[K + 1] >> 8 | statev[K + 2] << 24) ^ \
1020  statev[L + 1] << 18) & mask, \
1021  t2 = (statev[I + 2] ^ \
1022  (statev[J + 2] >> 11 & magic2) ^ \
1023  (statev[K + 2] >> 8 | statev[K + 3] << 24) ^ \
1024  statev[L + 2] << 18) & mask, \
1025  t3 = (statev[I + 3] ^ \
1026  (statev[J + 3] >> 11 & magic3) ^ \
1027  statev[K + 3] >> 8 ^ \
1028  statev[L + 3] << 18) & mask; \
1029  t3 ^= t1; t2 ^= t0; t3 ^= t2; t2 ^= t1; t1 ^= t0; \
1030  t3 ^= t2 >> 16 | (t3 << 16 & mask); \
1031  t2 ^= t1 >> 16 | (t2 << 16 & mask); \
1032  t1 ^= t0 >> 16 | (t1 << 16 & mask); \
1033  t0 ^= t0 << 16 & mask; \
1034  statev[I ] = t0 ^ (t0 << 8 & mask); \
1035  statev[I + 1] = t1 ^ (t0 >> 24 | (t1 << 8 & mask)); \
1036  statev[I + 2] = t2 ^ (t1 >> 24 | (t2 << 8 & mask)); \
1037  statev[I + 3] = t3 ^ (t2 >> 24 | (t3 << 8 & mask)); \
1038  }
1039 
1040  template<>
1042  internal_type statev[])
1043  throw() {
1044  if (count > 0) {
1045  // x[i+N] = g(x[i], x[i+M], x[i+N-2], x[i,N-1])
1047  s0 = statev[N - 8], s1 = statev[N - 7],
1048  s2 = statev[N - 6], s3 = statev[N - 5],
1049  r0 = statev[N - 4], r1 = statev[N - 3],
1050  r2 = statev[N - 2], r3 = statev[N - 1];
1051  for (; count; --count) {
1052  unsigned i = 0;
1053  for (; i + M < N; i += R) SFMT19937_STEP32(i, i + M );
1054  for (; i < N ; i += R) SFMT19937_STEP32(i, i + M - N);
1055  }
1056  } else if (count < 0)
1057  for (; count; ++count) {
1058  unsigned i = N;
1059  for (; i + M > N;) {
1060  i -= R; SFMT19937_REVSTEP32(i, i + M - N, i - 2 * R, i - R);
1061  }
1062  for (; i > 2 * R;) {
1063  i -= R; SFMT19937_REVSTEP32(i, i + M , i - 2 * R, i - R);
1064  }
1065  SFMT19937_REVSTEP32(R, M + R, N - R, 0 ); // i = R
1066  SFMT19937_REVSTEP32(0, M , N - 2 * R, N - R); // i = 0
1067  }
1068  }
1069 
1070 #undef SFMT19937_STEP32
1071 #undef SFMT19937_REVSTEP32
1072 
1073 #define SFMT19937_STEP64(I, J) { \
1074  internal_type t = statev[I] ^ statev[I] << 8 ^ \
1075  (statev[J] >> 11 & magic0) ^ \
1076  (s0 >> 8 | s1 << 56) ^ (r0 << 18 & mask18); \
1077  s0 = r0; r0 = t & mask; \
1078  t = statev[I + 1] ^ \
1079  (statev[I + 1] << 8 | statev[I] >> 56) ^ \
1080  (statev[J + 1] >> 11 & magic1) ^ \
1081  s1 >> 8 ^ (r1 << 18 & mask18); \
1082  s1 = r1; r1 = t & mask; \
1083  statev[I] = r0; statev[I + 1] = r1; \
1084  }
1085 
1086  // In combining the left and right shifts to simulate a 128-bit shift we
1087  // usually use or. However we can equivalently use xor (e.g., t1 << 8 ^ t0
1088  // >> 56 instead of t1 ^ t1 << 8 | t0 >> 56) and this speeds up the code if
1089  // used in some places.
1090 
1091 #define SFMT19937_REVSTEP64(I, J, K, L) { \
1092  internal_type \
1093  t0 = statev[I] ^ (statev[J] >> 11 & magic0) ^ \
1094  (statev[K] >> 8 | (statev[K + 1] << 56 & mask)) ^ \
1095  (statev[L] << 18 & mask18), \
1096  t1 = statev[I + 1] ^ (statev[J + 1] >> 11 & magic1) ^ \
1097  statev[K + 1] >> 8 ^ (statev[L + 1] << 18 & mask18); \
1098  t1 ^= t0; \
1099  t1 ^= t0 >> 32 ^ (t1 << 32 & mask); \
1100  t0 ^= t0 << 32 & mask; \
1101  t1 ^= t0 >> 48 ^ (t1 << 16 & mask); \
1102  t0 ^= t0 << 16 & mask; \
1103  statev[I ] = t0 ^ (t0 << 8 & mask); \
1104  statev[I + 1] = t1 ^ t0 >> 56 ^ (t1 << 8 & mask); \
1105  }
1106 
1107  template<>
1109  internal_type statev[])
1110  throw() {
1111  // x[i+N] = g(x[i], x[i+M], x[i+N-2], x[i,N-1])
1112  if (count > 0) {
1114  s0 = statev[N - 4], s1 = statev[N - 3],
1115  r0 = statev[N - 2], r1 = statev[N - 1];
1116  for (; count; --count) {
1117  unsigned i = 0;
1118  for (; i + M < N; i += R) SFMT19937_STEP64(i, i + M );
1119  for (; i < N ; i += R) SFMT19937_STEP64(i, i + M - N);
1120  }
1121  } else if (count < 0)
1122  for (; count; ++count) {
1123  unsigned i = N;
1124  for (; i + M > N;) {
1125  i -= R; SFMT19937_REVSTEP64(i, i + M - N, i - 2 * R, i - R);
1126  }
1127  for (; i > 2 * R;) {
1128  i -= R; SFMT19937_REVSTEP64(i, i + M , i - 2 * R, i - R);
1129  }
1130  SFMT19937_REVSTEP64(R, M + R, N - R, 0 ); // i = R
1131  SFMT19937_REVSTEP64(0, M , N - 2 * R, N - R); // i = 0
1132  }
1133  }
1134 
1135 #undef SFMT19937_STEP64
1136 #undef SFMT19937_REVSTEP64
1137 
1138 #endif // HAVE_SSE2 and HAVE_ALTIVEC
1139 
1140  template<>
1141  void SFMT19937<Random_u32>::NormalizeState(engine_type state[]) throw() {
1142  // Carry out the Period Certification for SFMT19937
1143  engine_type inner = (state[0] & PARITY0) ^ (state[1] & PARITY1) ^
1144  (state[2] & PARITY2) ^ (state[3] & PARITY3);
1145  for (unsigned s = 16; s; s >>= 1)
1146  inner ^= inner >> s;
1147  STATIC_ASSERT(PARITY_LSB < 32 && PARITY0 & 1u << PARITY_LSB,
1148  "inconsistent PARITY_LSB or PARITY0");
1149  // Now inner & 1 is the parity of the number of 1 bits in w_0 & p.
1150  if ((inner & 1u) == 0)
1151  // Change bit of w_0 corresponding to LSB of PARITY
1152  state[PARITY_LSB >> 5] ^= engine_type(1u) << (PARITY_LSB & 31u);
1153  }
1154 
1155  template<>
1156  void SFMT19937<Random_u64>::NormalizeState(engine_type state[]) throw() {
1157  // Carry out the Period Certification for SFMT19937
1158  engine_type inner = (state[0] & PARITY0) ^ (state[1] & PARITY1);
1159  for (unsigned s = 32; s; s >>= 1)
1160  inner ^= inner >> s;
1161  STATIC_ASSERT(PARITY_LSB < 64 && PARITY0 & 1u << PARITY_LSB,
1162  "inconsistent PARITY_LSB or PARITY0");
1163  // Now inner & 1 is the parity of the number of 1 bits in w_0 & p.
1164  if ((inner & 1u) == 0)
1165  // Change bit of w_0 corresponding to LSB of PARITY
1166  state[PARITY_LSB >> 6] ^= engine_type(1u) << (PARITY_LSB & 63u);
1167  }
1168 
1169  template<class RandomType>
1170  void SFMT19937<RandomType>::CheckState(const engine_type state[],
1171  Random_u32::type& check) {
1172  engine_type x = 0;
1173  Random_u32::type c = check;
1174  for (unsigned i = 0; i < N; ++i) {
1175  engine_t::CheckSum(state[i], c);
1176  x |= state[i];
1177  }
1178  if (x == 0)
1179  throw RandomErr("SFMT19937: All-zero state");
1180  check = c;
1181  }
1182 
1183  // RandomPower2 implementation
1184 
1185 #if RANDOMLIB_POWERTABLE
1186  // Powers of two. Just use floats here. As long as there's no overflow
1187  // or underflow these are exact. In particular they can be cast to
1188  // doubles or long doubles with no error.
1189  const float RandomPower2::power2[maxpow - minpow + 1] = {
1190 #if RANDOMLIB_LONGDOUBLEPREC > 64
1191  // It would be nice to be able to use the C99 notation of 0x1.0p-120
1192  // for 2^-120 here.
1193  1/1329227995784915872903807060280344576.f, // 2^-120
1194  1/664613997892457936451903530140172288.f, // 2^-119
1195  1/332306998946228968225951765070086144.f, // 2^-118
1196  1/166153499473114484112975882535043072.f, // 2^-117
1197  1/83076749736557242056487941267521536.f, // 2^-116
1198  1/41538374868278621028243970633760768.f, // 2^-115
1199  1/20769187434139310514121985316880384.f, // 2^-114
1200  1/10384593717069655257060992658440192.f, // 2^-113
1201  1/5192296858534827628530496329220096.f, // 2^-112
1202  1/2596148429267413814265248164610048.f, // 2^-111
1203  1/1298074214633706907132624082305024.f, // 2^-110
1204  1/649037107316853453566312041152512.f, // 2^-109
1205  1/324518553658426726783156020576256.f, // 2^-108
1206  1/162259276829213363391578010288128.f, // 2^-107
1207  1/81129638414606681695789005144064.f, // 2^-106
1208  1/40564819207303340847894502572032.f, // 2^-105
1209  1/20282409603651670423947251286016.f, // 2^-104
1210  1/10141204801825835211973625643008.f, // 2^-103
1211  1/5070602400912917605986812821504.f, // 2^-102
1212  1/2535301200456458802993406410752.f, // 2^-101
1213  1/1267650600228229401496703205376.f, // 2^-100
1214  1/633825300114114700748351602688.f, // 2^-99
1215  1/316912650057057350374175801344.f, // 2^-98
1216  1/158456325028528675187087900672.f, // 2^-97
1217  1/79228162514264337593543950336.f, // 2^-96
1218  1/39614081257132168796771975168.f, // 2^-95
1219  1/19807040628566084398385987584.f, // 2^-94
1220  1/9903520314283042199192993792.f, // 2^-93
1221  1/4951760157141521099596496896.f, // 2^-92
1222  1/2475880078570760549798248448.f, // 2^-91
1223  1/1237940039285380274899124224.f, // 2^-90
1224  1/618970019642690137449562112.f, // 2^-89
1225  1/309485009821345068724781056.f, // 2^-88
1226  1/154742504910672534362390528.f, // 2^-87
1227  1/77371252455336267181195264.f, // 2^-86
1228  1/38685626227668133590597632.f, // 2^-85
1229  1/19342813113834066795298816.f, // 2^-84
1230  1/9671406556917033397649408.f, // 2^-83
1231  1/4835703278458516698824704.f, // 2^-82
1232  1/2417851639229258349412352.f, // 2^-81
1233  1/1208925819614629174706176.f, // 2^-80
1234  1/604462909807314587353088.f, // 2^-79
1235  1/302231454903657293676544.f, // 2^-78
1236  1/151115727451828646838272.f, // 2^-77
1237  1/75557863725914323419136.f, // 2^-76
1238  1/37778931862957161709568.f, // 2^-75
1239  1/18889465931478580854784.f, // 2^-74
1240  1/9444732965739290427392.f, // 2^-73
1241  1/4722366482869645213696.f, // 2^-72
1242  1/2361183241434822606848.f, // 2^-71
1243  1/1180591620717411303424.f, // 2^-70
1244  1/590295810358705651712.f, // 2^-69
1245  1/295147905179352825856.f, // 2^-68
1246  1/147573952589676412928.f, // 2^-67
1247  1/73786976294838206464.f, // 2^-66
1248  1/36893488147419103232.f, // 2^-65
1249 #endif
1250  1/18446744073709551616.f, // 2^-64
1251  1/9223372036854775808.f, // 2^-63
1252  1/4611686018427387904.f, // 2^-62
1253  1/2305843009213693952.f, // 2^-61
1254  1/1152921504606846976.f, // 2^-60
1255  1/576460752303423488.f, // 2^-59
1256  1/288230376151711744.f, // 2^-58
1257  1/144115188075855872.f, // 2^-57
1258  1/72057594037927936.f, // 2^-56
1259  1/36028797018963968.f, // 2^-55
1260  1/18014398509481984.f, // 2^-54
1261  1/9007199254740992.f, // 2^-53
1262  1/4503599627370496.f, // 2^-52
1263  1/2251799813685248.f, // 2^-51
1264  1/1125899906842624.f, // 2^-50
1265  1/562949953421312.f, // 2^-49
1266  1/281474976710656.f, // 2^-48
1267  1/140737488355328.f, // 2^-47
1268  1/70368744177664.f, // 2^-46
1269  1/35184372088832.f, // 2^-45
1270  1/17592186044416.f, // 2^-44
1271  1/8796093022208.f, // 2^-43
1272  1/4398046511104.f, // 2^-42
1273  1/2199023255552.f, // 2^-41
1274  1/1099511627776.f, // 2^-40
1275  1/549755813888.f, // 2^-39
1276  1/274877906944.f, // 2^-38
1277  1/137438953472.f, // 2^-37
1278  1/68719476736.f, // 2^-36
1279  1/34359738368.f, // 2^-35
1280  1/17179869184.f, // 2^-34
1281  1/8589934592.f, // 2^-33
1282  1/4294967296.f, // 2^-32
1283  1/2147483648.f, // 2^-31
1284  1/1073741824.f, // 2^-30
1285  1/536870912.f, // 2^-29
1286  1/268435456.f, // 2^-28
1287  1/134217728.f, // 2^-27
1288  1/67108864.f, // 2^-26
1289  1/33554432.f, // 2^-25
1290  1/16777216.f, // 2^-24
1291  1/8388608.f, // 2^-23
1292  1/4194304.f, // 2^-22
1293  1/2097152.f, // 2^-21
1294  1/1048576.f, // 2^-20
1295  1/524288.f, // 2^-19
1296  1/262144.f, // 2^-18
1297  1/131072.f, // 2^-17
1298  1/65536.f, // 2^-16
1299  1/32768.f, // 2^-15
1300  1/16384.f, // 2^-14
1301  1/8192.f, // 2^-13
1302  1/4096.f, // 2^-12
1303  1/2048.f, // 2^-11
1304  1/1024.f, // 2^-10
1305  1/512.f, // 2^-9
1306  1/256.f, // 2^-8
1307  1/128.f, // 2^-7
1308  1/64.f, // 2^-6
1309  1/32.f, // 2^-5
1310  1/16.f, // 2^-4
1311  1/8.f, // 2^-3
1312  1/4.f, // 2^-2
1313  1/2.f, // 2^-1
1314  1.f, // 2^0
1315  2.f, // 2^1
1316  4.f, // 2^2
1317  8.f, // 2^3
1318  16.f, // 2^4
1319  32.f, // 2^5
1320  64.f, // 2^6
1321  128.f, // 2^7
1322  256.f, // 2^8
1323  512.f, // 2^9
1324  1024.f, // 2^10
1325  2048.f, // 2^11
1326  4096.f, // 2^12
1327  8192.f, // 2^13
1328  16384.f, // 2^14
1329  32768.f, // 2^15
1330  65536.f, // 2^16
1331  131072.f, // 2^17
1332  262144.f, // 2^18
1333  524288.f, // 2^19
1334  1048576.f, // 2^20
1335  2097152.f, // 2^21
1336  4194304.f, // 2^22
1337  8388608.f, // 2^23
1338  16777216.f, // 2^24
1339  33554432.f, // 2^25
1340  67108864.f, // 2^26
1341  134217728.f, // 2^27
1342  268435456.f, // 2^28
1343  536870912.f, // 2^29
1344  1073741824.f, // 2^30
1345  2147483648.f, // 2^31
1346  4294967296.f, // 2^32
1347  8589934592.f, // 2^33
1348  17179869184.f, // 2^34
1349  34359738368.f, // 2^35
1350  68719476736.f, // 2^36
1351  137438953472.f, // 2^37
1352  274877906944.f, // 2^38
1353  549755813888.f, // 2^39
1354  1099511627776.f, // 2^40
1355  2199023255552.f, // 2^41
1356  4398046511104.f, // 2^42
1357  8796093022208.f, // 2^43
1358  17592186044416.f, // 2^44
1359  35184372088832.f, // 2^45
1360  70368744177664.f, // 2^46
1361  140737488355328.f, // 2^47
1362  281474976710656.f, // 2^48
1363  562949953421312.f, // 2^49
1364  1125899906842624.f, // 2^50
1365  2251799813685248.f, // 2^51
1366  4503599627370496.f, // 2^52
1367  9007199254740992.f, // 2^53
1368  18014398509481984.f, // 2^54
1369  36028797018963968.f, // 2^55
1370  72057594037927936.f, // 2^56
1371  144115188075855872.f, // 2^57
1372  288230376151711744.f, // 2^58
1373  576460752303423488.f, // 2^59
1374  1152921504606846976.f, // 2^60
1375  2305843009213693952.f, // 2^61
1376  4611686018427387904.f, // 2^62
1377  9223372036854775808.f, // 2^63
1378  18446744073709551616.f, // 2^64
1379  };
1380 #endif
1381 
1382  // RandomEngine (and implicitly RandomAlgorithm and RandomMixer)
1383  // instantiations. The first 4 (using MixerMT[01]) are not recommended.
1384  template class RandomEngine< MT19937<Random_u32>, MixerMT0<Random_u32> >;
1385  template class RandomEngine< MT19937<Random_u64>, MixerMT0<Random_u64> >;
1386  template class RandomEngine< MT19937<Random_u32>, MixerMT1<Random_u32> >;
1387  template class RandomEngine< MT19937<Random_u64>, MixerMT1<Random_u64> >;
1388 
1389  template class RandomEngine< MT19937<Random_u32>, MixerSFMT>;
1390  template class RandomEngine< MT19937<Random_u64>, MixerSFMT>;
1391  template class RandomEngine<SFMT19937<Random_u32>, MixerSFMT>;
1392  template class RandomEngine<SFMT19937<Random_u64>, MixerSFMT>;
1393 
1394  // RandomCanonial instantiations
1395 
1396  template<> RandomCanonical<MRandomGenerator32>
1404 
1405 } // namespace RandomLib
static std::vector< seed_type > SeedVector()
Definition: Random.cpp:184
static void SeedToState(const std::vector< RandomSeed::seed_type > &seed, mixer_type state[], unsigned n)
Definition: Random.cpp:523
Generate random integers, reals, and booleans.
result_t::type result_type
void Save(std::ostream &os, bool bin=true) const
Definition: Random.cpp:375
Header for Random, RandomGenerator.
engine_t::type internal_type
#define SFMT19937_REVSTEP32(I, J, K, L)
Definition: Random.cpp:1012
static const type mask
Definition: RandomType.hpp:45
static void NormalizeState(engine_type state[])
Definition: Random.cpp:714
void StepCount(long long n)
Definition: Random.cpp:397
The original MT19937 mixing functionality.
Definition: RandomMixer.hpp:59
static void SeedToState(const std::vector< RandomSeed::seed_type > &seed, mixer_type state[], unsigned n)
Definition: Random.cpp:565
void Load(std::istream &is, bool bin=true)
#define SFMT19937_STEP64(I, J)
Definition: Random.cpp:1073
Exception handling for RandomLib.
Definition: Random.hpp:103
static std::vector< seed_type > StringToVector(const std::string &s)
Definition: Random.cpp:227
static void Read32(std::istream &is, bool bin, type &x)
Definition: Random.cpp:125
#define STATIC_ASSERT(cond, reason)
Definition: MPFRRandom.hpp:31
void SetCount(long long n)
seed_t::type seed_type
Definition: RandomSeed.hpp:73
static void SeedToState(const std::vector< RandomSeed::seed_type > &seed, mixer_type state[], unsigned n)
Definition: Random.cpp:606
static void Transition(long long count, internal_type statev[])
static void CheckState(const engine_type state[], Random_u32::type &check)
Definition: Random.cpp:1170
void Reseed(const std::vector< IntType > &v)
Definition: RandomSeed.hpp:86
static void NormalizeState(engine_type state[])
static seed_type SeedWord()
Definition: Random.cpp:161
static void CheckState(const engine_type state[], Random_u32::type &check)
Definition: Random.cpp:735
static void CheckSum(type n, uint32_t &check)
#define MT19937_STEP(I, J, K)
Definition: Random.cpp:668
static void Transition(long long count, internal_type statev[])
Definition: Random.cpp:687
#define MT19937_REVSTEP(I, J, K)
Definition: Random.cpp:679
Uniform random number generator.
static type cast(IntType x)
Definition: RandomType.hpp:62
#define SFMT19937_STEP32(I, J)
Definition: Random.cpp:989
std::string SeedString() const
Definition: RandomSeed.hpp:153
#define SFMT19937_REVSTEP64(I, J, K, L)
Definition: Random.cpp:1091
engine_t::type internal_type
static void SelfTest()
Definition: Random.cpp:421
static void Write32(std::ostream &os, bool bin, int &cnt, type x)
Definition: Random.cpp:99