diff --git a/Cargo.lock b/Cargo.lock index e9bdd29606..0b740e8158 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1245,15 +1245,6 @@ dependencies = [ "getrandom", ] -[[package]] -name = "rand_distr" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96977acbdd3a6576fb1d27391900035bf3863d4a16422973a409b488cf29ffb2" -dependencies = [ - "rand 0.7.3", -] - [[package]] name = "rand_hc" version = "0.1.0" @@ -1539,7 +1530,7 @@ dependencies = [ "paste", "pwd", "rand 0.7.3", - "rand_distr", + "rand_core 0.5.1", "regex", "result-like", "rustc_version_runtime", diff --git a/Lib/random.py b/Lib/random.py new file mode 100644 index 0000000000..61e881642c --- /dev/null +++ b/Lib/random.py @@ -0,0 +1,772 @@ +"""Random variable generators. + + integers + -------- + uniform within range + + sequences + --------- + pick random element + pick random sample + pick weighted random sample + generate random permutation + + distributions on the real line: + ------------------------------ + uniform + triangular + normal (Gaussian) + lognormal + negative exponential + gamma + beta + pareto + Weibull + + distributions on the circle (angles 0 to 2pi) + --------------------------------------------- + circular uniform + von Mises + +General notes on the underlying Mersenne Twister core generator: + +* The period is 2**19937-1. +* It is one of the most extensively tested generators in existence. +* The random() method is implemented in C, executes in a single Python step, + and is, therefore, threadsafe. + +""" + +from warnings import warn as _warn +from types import MethodType as _MethodType, BuiltinMethodType as _BuiltinMethodType +from math import log as _log, exp as _exp, pi as _pi, e as _e, ceil as _ceil +from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin +from os import urandom as _urandom +from _collections_abc import Set as _Set, Sequence as _Sequence +from hashlib import sha512 as _sha512 +import itertools as _itertools +import bisect as _bisect + +__all__ = ["Random","seed","random","uniform","randint","choice","sample", + "randrange","shuffle","normalvariate","lognormvariate", + "expovariate","vonmisesvariate","gammavariate","triangular", + "gauss","betavariate","paretovariate","weibullvariate", + "getstate","setstate", "getrandbits", "choices", + "SystemRandom"] + +NV_MAGICCONST = 4 * _exp(-0.5)/_sqrt(2.0) +TWOPI = 2.0*_pi +LOG4 = _log(4.0) +SG_MAGICCONST = 1.0 + _log(4.5) +BPF = 53 # Number of bits in a float +RECIP_BPF = 2**-BPF + + +# Translated by Guido van Rossum from C source provided by +# Adrian Baddeley. Adapted by Raymond Hettinger for use with +# the Mersenne Twister and os.urandom() core generators. + +import _random + +class Random(_random.Random): + """Random number generator base class used by bound module functions. + + Used to instantiate instances of Random to get generators that don't + share state. + + Class Random can also be subclassed if you want to use a different basic + generator of your own devising: in that case, override the following + methods: random(), seed(), getstate(), and setstate(). + Optionally, implement a getrandbits() method so that randrange() + can cover arbitrarily large ranges. + + """ + + VERSION = 3 # used by getstate/setstate + + def __init__(self, x=None): + """Initialize an instance. + + Optional argument x controls seeding, as for Random.seed(). + """ + + self.seed(x) + self.gauss_next = None + + def seed(self, a=None, version=2): + """Initialize internal state from hashable object. + + None or no argument seeds from current time or from an operating + system specific randomness source if available. + + If *a* is an int, all bits are used. + + For version 2 (the default), all of the bits are used if *a* is a str, + bytes, or bytearray. For version 1 (provided for reproducing random + sequences from older versions of Python), the algorithm for str and + bytes generates a narrower range of seeds. + + """ + + if version == 1 and isinstance(a, (str, bytes)): + a = a.decode('latin-1') if isinstance(a, bytes) else a + x = ord(a[0]) << 7 if a else 0 + for c in map(ord, a): + x = ((1000003 * x) ^ c) & 0xFFFFFFFFFFFFFFFF + x ^= len(a) + a = -2 if x == -1 else x + + if version == 2 and isinstance(a, (str, bytes, bytearray)): + if isinstance(a, str): + a = a.encode() + a += _sha512(a).digest() + a = int.from_bytes(a, 'big') + + super().seed(a) + self.gauss_next = None + + def getstate(self): + """Return internal state; can be passed to setstate() later.""" + return self.VERSION, super().getstate(), self.gauss_next + + def setstate(self, state): + """Restore internal state from object returned by getstate().""" + version = state[0] + if version == 3: + version, internalstate, self.gauss_next = state + super().setstate(internalstate) + elif version == 2: + version, internalstate, self.gauss_next = state + # In version 2, the state was saved as signed ints, which causes + # inconsistencies between 32/64-bit systems. The state is + # really unsigned 32-bit ints, so we convert negative ints from + # version 2 to positive longs for version 3. + try: + internalstate = tuple(x % (2**32) for x in internalstate) + except ValueError as e: + raise TypeError from e + super().setstate(internalstate) + else: + raise ValueError("state with version %s passed to " + "Random.setstate() of version %s" % + (version, self.VERSION)) + +## ---- Methods below this point do not need to be overridden when +## ---- subclassing for the purpose of using a different core generator. + +## -------------------- pickle support ------------------- + + # Issue 17489: Since __reduce__ was defined to fix #759889 this is no + # longer called; we leave it here because it has been here since random was + # rewritten back in 2001 and why risk breaking something. + def __getstate__(self): # for pickle + return self.getstate() + + def __setstate__(self, state): # for pickle + self.setstate(state) + + def __reduce__(self): + return self.__class__, (), self.getstate() + +## -------------------- integer methods ------------------- + + def randrange(self, start, stop=None, step=1, _int=int): + """Choose a random item from range(start, stop[, step]). + + This fixes the problem with randint() which includes the + endpoint; in Python this is usually not what you want. + + """ + + # This code is a bit messy to make it fast for the + # common case while still doing adequate error checking. + istart = _int(start) + if istart != start: + raise ValueError("non-integer arg 1 for randrange()") + if stop is None: + if istart > 0: + return self._randbelow(istart) + raise ValueError("empty range for randrange()") + + # stop argument supplied. + istop = _int(stop) + if istop != stop: + raise ValueError("non-integer stop for randrange()") + width = istop - istart + if step == 1 and width > 0: + return istart + self._randbelow(width) + if step == 1: + raise ValueError("empty range for randrange() (%d,%d, %d)" % (istart, istop, width)) + + # Non-unit step argument supplied. + istep = _int(step) + if istep != step: + raise ValueError("non-integer step for randrange()") + if istep > 0: + n = (width + istep - 1) // istep + elif istep < 0: + n = (width + istep + 1) // istep + else: + raise ValueError("zero step for randrange()") + + if n <= 0: + raise ValueError("empty range for randrange()") + + return istart + istep*self._randbelow(n) + + def randint(self, a, b): + """Return random integer in range [a, b], including both end points. + """ + + return self.randrange(a, b+1) + + def _randbelow(self, n, int=int, maxsize=1<= n: + r = getrandbits(k) + return r + # There's an overridden random() method but no new getrandbits() method, + # so we can only use random() from here. + if n >= maxsize: + _warn("Underlying random() generator does not supply \n" + "enough bits to choose from a population range this large.\n" + "To remove the range limitation, add a getrandbits() method.") + return int(random() * n) + if n == 0: + raise ValueError("Boundary cannot be zero") + rem = maxsize % n + limit = (maxsize - rem) / maxsize # int(limit * maxsize) % n == 0 + r = random() + while r >= limit: + r = random() + return int(r*maxsize) % n + +## -------------------- sequence methods ------------------- + + def choice(self, seq): + """Choose a random element from a non-empty sequence.""" + try: + i = self._randbelow(len(seq)) + except ValueError: + raise IndexError('Cannot choose from an empty sequence') from None + return seq[i] + + def shuffle(self, x, random=None): + """Shuffle list x in place, and return None. + + Optional argument random is a 0-argument function returning a + random float in [0.0, 1.0); if it is the default None, the + standard random.random will be used. + + """ + + if random is None: + randbelow = self._randbelow + for i in reversed(range(1, len(x))): + # pick an element in x[:i+1] with which to exchange x[i] + j = randbelow(i+1) + x[i], x[j] = x[j], x[i] + else: + _int = int + for i in reversed(range(1, len(x))): + # pick an element in x[:i+1] with which to exchange x[i] + j = _int(random() * (i+1)) + x[i], x[j] = x[j], x[i] + + def sample(self, population, k): + """Chooses k unique random elements from a population sequence or set. + + Returns a new list containing elements from the population while + leaving the original population unchanged. The resulting list is + in selection order so that all sub-slices will also be valid random + samples. This allows raffle winners (the sample) to be partitioned + into grand prize and second place winners (the subslices). + + Members of the population need not be hashable or unique. If the + population contains repeats, then each occurrence is a possible + selection in the sample. + + To choose a sample in a range of integers, use range as an argument. + This is especially fast and space efficient for sampling from a + large population: sample(range(10000000), 60) + """ + + # Sampling without replacement entails tracking either potential + # selections (the pool) in a list or previous selections in a set. + + # When the number of selections is small compared to the + # population, then tracking selections is efficient, requiring + # only a small set and an occasional reselection. For + # a larger number of selections, the pool tracking method is + # preferred since the list takes less space than the + # set and it doesn't suffer from frequent reselections. + + if isinstance(population, _Set): + population = tuple(population) + if not isinstance(population, _Sequence): + raise TypeError("Population must be a sequence or set. For dicts, use list(d).") + randbelow = self._randbelow + n = len(population) + if not 0 <= k <= n: + raise ValueError("Sample larger than population or is negative") + result = [None] * k + setsize = 21 # size of a small set minus size of an empty list + if k > 5: + setsize += 4 ** _ceil(_log(k * 3, 4)) # table size for big sets + if n <= setsize: + # An n-length list is smaller than a k-length set + pool = list(population) + for i in range(k): # invariant: non-selected at [0,n-i) + j = randbelow(n-i) + result[i] = pool[j] + pool[j] = pool[n-i-1] # move non-selected item into vacancy + else: + selected = set() + selected_add = selected.add + for i in range(k): + j = randbelow(n) + while j in selected: + j = randbelow(n) + selected_add(j) + result[i] = population[j] + return result + + def choices(self, population, weights=None, *, cum_weights=None, k=1): + """Return a k sized list of population elements chosen with replacement. + + If the relative weights or cumulative weights are not specified, + the selections are made with equal probability. + + """ + random = self.random + if cum_weights is None: + if weights is None: + _int = int + total = len(population) + return [population[_int(random() * total)] for i in range(k)] + cum_weights = list(_itertools.accumulate(weights)) + elif weights is not None: + raise TypeError('Cannot specify both weights and cumulative weights') + if len(cum_weights) != len(population): + raise ValueError('The number of weights does not match the population') + bisect = _bisect.bisect + total = cum_weights[-1] + hi = len(cum_weights) - 1 + return [population[bisect(cum_weights, random() * total, 0, hi)] + for i in range(k)] + +## -------------------- real-valued distributions ------------------- + +## -------------------- uniform distribution ------------------- + + def uniform(self, a, b): + "Get a random number in the range [a, b) or [a, b] depending on rounding." + return a + (b-a) * self.random() + +## -------------------- triangular -------------------- + + def triangular(self, low=0.0, high=1.0, mode=None): + """Triangular distribution. + + Continuous distribution bounded by given lower and upper limits, + and having a given mode value in-between. + + http://en.wikipedia.org/wiki/Triangular_distribution + + """ + u = self.random() + try: + c = 0.5 if mode is None else (mode - low) / (high - low) + except ZeroDivisionError: + return low + if u > c: + u = 1.0 - u + c = 1.0 - c + low, high = high, low + return low + (high - low) * (u * c) ** 0.5 + +## -------------------- normal distribution -------------------- + + def normalvariate(self, mu, sigma): + """Normal distribution. + + mu is the mean, and sigma is the standard deviation. + + """ + # mu = mean, sigma = standard deviation + + # Uses Kinderman and Monahan method. Reference: Kinderman, + # A.J. and Monahan, J.F., "Computer generation of random + # variables using the ratio of uniform deviates", ACM Trans + # Math Software, 3, (1977), pp257-260. + + random = self.random + while 1: + u1 = random() + u2 = 1.0 - random() + z = NV_MAGICCONST*(u1-0.5)/u2 + zz = z*z/4.0 + if zz <= -_log(u2): + break + return mu + z*sigma + +## -------------------- lognormal distribution -------------------- + + def lognormvariate(self, mu, sigma): + """Log normal distribution. + + If you take the natural logarithm of this distribution, you'll get a + normal distribution with mean mu and standard deviation sigma. + mu can have any value, and sigma must be greater than zero. + + """ + return _exp(self.normalvariate(mu, sigma)) + +## -------------------- exponential distribution -------------------- + + def expovariate(self, lambd): + """Exponential distribution. + + lambd is 1.0 divided by the desired mean. It should be + nonzero. (The parameter would be called "lambda", but that is + a reserved word in Python.) Returned values range from 0 to + positive infinity if lambd is positive, and from negative + infinity to 0 if lambd is negative. + + """ + # lambd: rate lambd = 1/mean + # ('lambda' is a Python reserved word) + + # we use 1-random() instead of random() to preclude the + # possibility of taking the log of zero. + return -_log(1.0 - self.random())/lambd + +## -------------------- von Mises distribution -------------------- + + def vonmisesvariate(self, mu, kappa): + """Circular data distribution. + + mu is the mean angle, expressed in radians between 0 and 2*pi, and + kappa is the concentration parameter, which must be greater than or + equal to zero. If kappa is equal to zero, this distribution reduces + to a uniform random angle over the range 0 to 2*pi. + + """ + # mu: mean angle (in radians between 0 and 2*pi) + # kappa: concentration parameter kappa (>= 0) + # if kappa = 0 generate uniform random angle + + # Based upon an algorithm published in: Fisher, N.I., + # "Statistical Analysis of Circular Data", Cambridge + # University Press, 1993. + + # Thanks to Magnus Kessler for a correction to the + # implementation of step 4. + + random = self.random + if kappa <= 1e-6: + return TWOPI * random() + + s = 0.5 / kappa + r = s + _sqrt(1.0 + s * s) + + while 1: + u1 = random() + z = _cos(_pi * u1) + + d = z / (r + z) + u2 = random() + if u2 < 1.0 - d * d or u2 <= (1.0 - d) * _exp(d): + break + + q = 1.0 / r + f = (q + z) / (1.0 + q * z) + u3 = random() + if u3 > 0.5: + theta = (mu + _acos(f)) % TWOPI + else: + theta = (mu - _acos(f)) % TWOPI + + return theta + +## -------------------- gamma distribution -------------------- + + def gammavariate(self, alpha, beta): + """Gamma distribution. Not the gamma function! + + Conditions on the parameters are alpha > 0 and beta > 0. + + The probability distribution function is: + + x ** (alpha - 1) * math.exp(-x / beta) + pdf(x) = -------------------------------------- + math.gamma(alpha) * beta ** alpha + + """ + + # alpha > 0, beta > 0, mean is alpha*beta, variance is alpha*beta**2 + + # Warning: a few older sources define the gamma distribution in terms + # of alpha > -1.0 + if alpha <= 0.0 or beta <= 0.0: + raise ValueError('gammavariate: alpha and beta must be > 0.0') + + random = self.random + if alpha > 1.0: + + # Uses R.C.H. Cheng, "The generation of Gamma + # variables with non-integral shape parameters", + # Applied Statistics, (1977), 26, No. 1, p71-74 + + ainv = _sqrt(2.0 * alpha - 1.0) + bbb = alpha - LOG4 + ccc = alpha + ainv + + while 1: + u1 = random() + if not 1e-7 < u1 < .9999999: + continue + u2 = 1.0 - random() + v = _log(u1/(1.0-u1))/ainv + x = alpha*_exp(v) + z = u1*u1*u2 + r = bbb+ccc*v-x + if r + SG_MAGICCONST - 4.5*z >= 0.0 or r >= _log(z): + return x * beta + + elif alpha == 1.0: + # expovariate(1) + u = random() + while u <= 1e-7: + u = random() + return -_log(u) * beta + + else: # alpha is between 0 and 1 (exclusive) + + # Uses ALGORITHM GS of Statistical Computing - Kennedy & Gentle + + while 1: + u = random() + b = (_e + alpha)/_e + p = b*u + if p <= 1.0: + x = p ** (1.0/alpha) + else: + x = -_log((b-p)/alpha) + u1 = random() + if p > 1.0: + if u1 <= x ** (alpha - 1.0): + break + elif u1 <= _exp(-x): + break + return x * beta + +## -------------------- Gauss (faster alternative) -------------------- + + def gauss(self, mu, sigma): + """Gaussian distribution. + + mu is the mean, and sigma is the standard deviation. This is + slightly faster than the normalvariate() function. + + Not thread-safe without a lock around calls. + + """ + + # When x and y are two variables from [0, 1), uniformly + # distributed, then + # + # cos(2*pi*x)*sqrt(-2*log(1-y)) + # sin(2*pi*x)*sqrt(-2*log(1-y)) + # + # are two *independent* variables with normal distribution + # (mu = 0, sigma = 1). + # (Lambert Meertens) + # (corrected version; bug discovered by Mike Miller, fixed by LM) + + # Multithreading note: When two threads call this function + # simultaneously, it is possible that they will receive the + # same return value. The window is very small though. To + # avoid this, you have to use a lock around all calls. (I + # didn't want to slow this down in the serial case by using a + # lock here.) + + random = self.random + z = self.gauss_next + self.gauss_next = None + if z is None: + x2pi = random() * TWOPI + g2rad = _sqrt(-2.0 * _log(1.0 - random())) + z = _cos(x2pi) * g2rad + self.gauss_next = _sin(x2pi) * g2rad + + return mu + z*sigma + +## -------------------- beta -------------------- +## See +## http://mail.python.org/pipermail/python-bugs-list/2001-January/003752.html +## for Ivan Frohne's insightful analysis of why the original implementation: +## +## def betavariate(self, alpha, beta): +## # Discrete Event Simulation in C, pp 87-88. +## +## y = self.expovariate(alpha) +## z = self.expovariate(1.0/beta) +## return z/(y+z) +## +## was dead wrong, and how it probably got that way. + + def betavariate(self, alpha, beta): + """Beta distribution. + + Conditions on the parameters are alpha > 0 and beta > 0. + Returned values range between 0 and 1. + + """ + + # This version due to Janne Sinkkonen, and matches all the std + # texts (e.g., Knuth Vol 2 Ed 3 pg 134 "the beta distribution"). + y = self.gammavariate(alpha, 1.0) + if y == 0: + return 0.0 + else: + return y / (y + self.gammavariate(beta, 1.0)) + +## -------------------- Pareto -------------------- + + def paretovariate(self, alpha): + """Pareto distribution. alpha is the shape parameter.""" + # Jain, pg. 495 + + u = 1.0 - self.random() + return 1.0 / u ** (1.0/alpha) + +## -------------------- Weibull -------------------- + + def weibullvariate(self, alpha, beta): + """Weibull distribution. + + alpha is the scale parameter and beta is the shape parameter. + + """ + # Jain, pg. 499; bug fix courtesy Bill Arms + + u = 1.0 - self.random() + return alpha * (-_log(u)) ** (1.0/beta) + +## --------------- Operating System Random Source ------------------ + +class SystemRandom(Random): + """Alternate random number generator using sources provided + by the operating system (such as /dev/urandom on Unix or + CryptGenRandom on Windows). + + Not available on all systems (see os.urandom() for details). + """ + + def random(self): + """Get the next random number in the range [0.0, 1.0).""" + return (int.from_bytes(_urandom(7), 'big') >> 3) * RECIP_BPF + + def getrandbits(self, k): + """getrandbits(k) -> x. Generates an int with k random bits.""" + if k <= 0: + raise ValueError('number of bits must be greater than zero') + if k != int(k): + raise TypeError('number of bits should be an integer') + numbytes = (k + 7) // 8 # bits / 8 and rounded up + x = int.from_bytes(_urandom(numbytes), 'big') + return x >> (numbytes * 8 - k) # trim excess bits + + def seed(self, *args, **kwds): + "Stub method. Not used for a system random number generator." + return None + + def _notimplemented(self, *args, **kwds): + "Method should not be called for a system random number generator." + raise NotImplementedError('System entropy source does not have state.') + getstate = setstate = _notimplemented + +## -------------------- test program -------------------- + +def _test_generator(n, func, args): + import time + print(n, 'times', func.__name__) + total = 0.0 + sqsum = 0.0 + smallest = 1e10 + largest = -1e10 + t0 = time.time() + for i in range(n): + x = func(*args) + total += x + sqsum = sqsum + x*x + smallest = min(x, smallest) + largest = max(x, largest) + t1 = time.time() + print(round(t1-t0, 3), 'sec,', end=' ') + avg = total/n + stddev = _sqrt(sqsum/n - avg*avg) + print('avg %g, stddev %g, min %g, max %g\n' % \ + (avg, stddev, smallest, largest)) + + +def _test(N=2000): + _test_generator(N, random, ()) + _test_generator(N, normalvariate, (0.0, 1.0)) + _test_generator(N, lognormvariate, (0.0, 1.0)) + _test_generator(N, vonmisesvariate, (0.0, 1.0)) + _test_generator(N, gammavariate, (0.01, 1.0)) + _test_generator(N, gammavariate, (0.1, 1.0)) + _test_generator(N, gammavariate, (0.1, 2.0)) + _test_generator(N, gammavariate, (0.5, 1.0)) + _test_generator(N, gammavariate, (0.9, 1.0)) + _test_generator(N, gammavariate, (1.0, 1.0)) + _test_generator(N, gammavariate, (2.0, 1.0)) + _test_generator(N, gammavariate, (20.0, 1.0)) + _test_generator(N, gammavariate, (200.0, 1.0)) + _test_generator(N, gauss, (0.0, 1.0)) + _test_generator(N, betavariate, (3.0, 3.0)) + _test_generator(N, triangular, (0.0, 1.0, 1.0/3.0)) + +# Create one instance, seeded from current time, and export its methods +# as module-level functions. The functions share state across all uses +#(both in the user's code and in the Python libraries), but that's fine +# for most programs and is easier for the casual user than making them +# instantiate their own Random() instance. + +_inst = Random() +seed = _inst.seed +random = _inst.random +uniform = _inst.uniform +triangular = _inst.triangular +randint = _inst.randint +choice = _inst.choice +randrange = _inst.randrange +sample = _inst.sample +shuffle = _inst.shuffle +choices = _inst.choices +normalvariate = _inst.normalvariate +lognormvariate = _inst.lognormvariate +expovariate = _inst.expovariate +vonmisesvariate = _inst.vonmisesvariate +gammavariate = _inst.gammavariate +gauss = _inst.gauss +betavariate = _inst.betavariate +paretovariate = _inst.paretovariate +weibullvariate = _inst.weibullvariate +getstate = _inst.getstate +setstate = _inst.setstate +getrandbits = _inst.getrandbits + +if __name__ == '__main__': + _test() diff --git a/Lib/tempfile.py b/Lib/tempfile.py index 469a1bb3be..61462357c7 100644 --- a/Lib/tempfile.py +++ b/Lib/tempfile.py @@ -42,8 +42,7 @@ import os as _os import shutil as _shutil import errno as _errno -# XXX RustPython TODO: _random -#from random import Random as _Random +from random import Random as _Random import weakref as _weakref try: @@ -157,11 +156,7 @@ def __iter__(self): def __next__(self): c = self.characters - def choose(s): - import math, random - return s[math.floor(random.random() * len(s))] - # XXX RustPython TODO: proper random impl - # choose = self.rng.choose + choose = self.rng.choice letters = [choose(c) for dummy in range(8)] return ''.join(letters) diff --git a/tests/snippets/stdlib_random.py b/tests/snippets/stdlib_random.py new file mode 100644 index 0000000000..81255ef5b8 --- /dev/null +++ b/tests/snippets/stdlib_random.py @@ -0,0 +1,29 @@ +import random + +random.seed(1234) + +# random.randint +assert random.randint(1, 11) == 8 + +# random.shuffle +left = list(range(10)) +right = [2, 7, 3, 5, 8, 4, 6, 9, 0, 1] +random.shuffle(left) +assert left == right + +# random.choice +assert random.choice(left) == 5 + +# random.choices +expected = ['red', 'green', 'red', 'black', 'black', 'red'] +result = random.choices(['red', 'black', 'green'], [18, 18, 2], k=6) +assert expected == result + +# random.sample +sampled = [0, 2, 1] +assert random.sample(list(range(3)), 3) == sampled + +# TODO : random.random(), random.uniform(), random.triangular(), +# random.betavariate, random.expovariate, random.gammavariate, +# random.gauss, random.lognormvariate, random.normalvariate, +# random.vonmisesvariate, random.paretovariate, random.weibullvariate diff --git a/vm/Cargo.toml b/vm/Cargo.toml index f404bb9cb1..fccb2dbcc5 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -31,7 +31,7 @@ num-integer = "0.1.41" num-rational = "0.2.2" num-iter = "0.1.39" rand = "0.7" -rand_distr = "0.2" +rand_core = "0.5" getrandom = "0.1" log = "0.4" rustpython-derive = {path = "../derive", version = "0.1.1"} diff --git a/vm/src/stdlib/mod.rs b/vm/src/stdlib/mod.rs index d3dd0e7329..52ae1a2c9d 100644 --- a/vm/src/stdlib/mod.rs +++ b/vm/src/stdlib/mod.rs @@ -78,7 +78,7 @@ pub fn get_module_inits() -> HashMap { "_operator".to_string() => Box::new(operator::make_module), "platform".to_string() => Box::new(platform::make_module), "regex_crate".to_string() => Box::new(re::make_module), - "random".to_string() => Box::new(random::make_module), + "_random".to_string() => Box::new(random::make_module), "_string".to_string() => Box::new(string::make_module), "struct".to_string() => Box::new(pystruct::make_module), "_thread".to_string() => Box::new(thread::make_module), diff --git a/vm/src/stdlib/random.rs b/vm/src/stdlib/random.rs index 685088f6af..b37cca4cc4 100644 --- a/vm/src/stdlib/random.rs +++ b/vm/src/stdlib/random.rs @@ -1,46 +1,129 @@ //! Random module. -use rand::distributions::Distribution; -use rand_distr::Normal; +use std::cell::RefCell; -use crate::pyobject::{PyObjectRef, PyResult}; -use crate::vm::VirtualMachine; +use num_bigint::{BigInt, Sign}; +use num_traits::Signed; +use rand::RngCore; -pub fn make_module(vm: &VirtualMachine) -> PyObjectRef { - let ctx = &vm.ctx; +use crate::function::OptionalOption; +use crate::obj::objint::PyIntRef; +use crate::obj::objtype::PyClassRef; +use crate::pyobject::{PyClassImpl, PyObjectRef, PyRef, PyResult, PyValue}; +use crate::VirtualMachine; - py_module!(vm, "random", { - "gauss" => ctx.new_function(random_normalvariate), // TODO: is this the same? - "normalvariate" => ctx.new_function(random_normalvariate), - "random" => ctx.new_function(random_random), - // "weibull", ctx.new_function(random_weibullvariate), - }) +mod mersenne; + +#[derive(Debug)] +enum PyRng { + Std(rand::rngs::ThreadRng), + MT(Box), +} + +impl Default for PyRng { + fn default() -> Self { + PyRng::Std(rand::thread_rng()) + } } -fn random_normalvariate(mu: f64, sigma: f64, vm: &VirtualMachine) -> PyResult { - let normal = Normal::new(mu, sigma).map_err(|rand_err| { - vm.new_exception_msg( - vm.ctx.exceptions.arithmetic_error.clone(), - format!("invalid normal distribution: {:?}", rand_err), - ) - })?; - let value = normal.sample(&mut rand::thread_rng()); - Ok(value) +impl RngCore for PyRng { + fn next_u32(&mut self) -> u32 { + match self { + Self::Std(s) => s.next_u32(), + Self::MT(m) => m.next_u32(), + } + } + fn next_u64(&mut self) -> u64 { + match self { + Self::Std(s) => s.next_u64(), + Self::MT(m) => m.next_u64(), + } + } + fn fill_bytes(&mut self, dest: &mut [u8]) { + match self { + Self::Std(s) => s.fill_bytes(dest), + Self::MT(m) => m.fill_bytes(dest), + } + } + fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), rand::Error> { + match self { + Self::Std(s) => s.try_fill_bytes(dest), + Self::MT(m) => m.try_fill_bytes(dest), + } + } } -fn random_random(_vm: &VirtualMachine) -> f64 { - rand::random() +#[pyclass(name = "Random")] +#[derive(Debug)] +struct PyRandom { + rng: RefCell, +} + +impl PyValue for PyRandom { + fn class(vm: &VirtualMachine) -> PyClassRef { + vm.class("_random", "Random") + } +} + +#[pyimpl(flags(BASETYPE))] +impl PyRandom { + #[pyslot(new)] + fn new(cls: PyClassRef, vm: &VirtualMachine) -> PyResult> { + PyRandom { + rng: RefCell::new(PyRng::default()), + } + .into_ref_with_type(vm, cls) + } + + #[pymethod] + fn random(&self) -> f64 { + mersenne::gen_res53(&mut *self.rng.borrow_mut()) + } + + #[pymethod] + fn seed(&self, n: OptionalOption) { + let new_rng = match n.flat_option() { + None => PyRng::default(), + Some(n) => { + let (_, mut key) = n.as_bigint().abs().to_u32_digits(); + if cfg!(target_endian = "big") { + key.reverse(); + } + PyRng::MT(Box::new(mersenne::MT19937::new_with_slice_seed(&key))) + } + }; + + *self.rng.borrow_mut() = new_rng; + } + + #[pymethod] + fn getrandbits(&self, mut k: usize) -> BigInt { + let mut rng = self.rng.borrow_mut(); + + let mut gen_u32 = |k| rng.next_u32() >> (32 - k) as u32; + + if k <= 32 { + return gen_u32(k).into(); + } + + let words = (k - 1) / 8 + 1; + let mut wordarray = vec![0u32; words]; + + let it = wordarray.iter_mut(); + #[cfg(target_endian = "big")] + let it = it.rev(); + for word in it { + *word = gen_u32(k); + k -= 32; + } + + BigInt::from_slice(Sign::NoSign, &wordarray) + } } -/* - * TODO: enable this function: -fn random_weibullvariate(vm: &VirtualMachine, args: PyFuncArgs) -> PyResult { - arg_check!(vm, args, required = [(alpha, Some(vm.ctx.float_type())), (beta, Some(vm.ctx.float_type()))]); - let alpha = objfloat::get_value(alpha); - let beta = objfloat::get_value(beta); - let weibull = Weibull::new(alpha, beta); - let value = weibull.sample(&mut rand::thread_rng()); - let py_value = vm.ctx.new_float(value); - Ok(py_value) +pub fn make_module(vm: &VirtualMachine) -> PyObjectRef { + let ctx = &vm.ctx; + py_module!(vm, "_random", { + "Random" => PyRandom::make_class(ctx), + }) } -*/ diff --git a/vm/src/stdlib/random/mersenne.rs b/vm/src/stdlib/random/mersenne.rs new file mode 100644 index 0000000000..b0f802ffa4 --- /dev/null +++ b/vm/src/stdlib/random/mersenne.rs @@ -0,0 +1,211 @@ +#![allow(clippy::unreadable_literal)] + +/* + A C-program for MT19937, with initialization improved 2002/1/26. + Coded by Takuji Nishimura and Makoto Matsumoto. + + Before using, initialize the state by using init_genrand(seed) + or init_by_array(init_key, key_length). + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + Any feedback is very welcome. + http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html + email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) +*/ + +// this was translated from c; all rights go to copyright holders listed above +// https://gist.github.com/coolreader18/b56d510f1b0551d2954d74ad289f7d2e + +/* Period parameters */ +const N: usize = 624; +const M: usize = 397; +const MATRIX_A: u32 = 0x9908b0dfu32; /* constant vector a */ +const UPPER_MASK: u32 = 0x80000000u32; /* most significant w-r bits */ +const LOWER_MASK: u32 = 0x7fffffffu32; /* least significant r bits */ + +pub struct MT19937 { + mt: [u32; N], /* the array for the state vector */ + mti: usize, /* mti==N+1 means mt[N] is not initialized */ +} +impl Default for MT19937 { + fn default() -> Self { + MT19937 { + mt: [0; N], + mti: N + 1, + } + } +} +impl std::fmt::Debug for MT19937 { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.pad("MT19937") + } +} + +impl MT19937 { + pub fn new_with_slice_seed(init_key: &[u32]) -> Self { + let mut state = Self::default(); + state.seed_slice(init_key); + state + } + + /* initializes self.mt[N] with a seed */ + fn seed(&mut self, s: u32) { + self.mt[0] = s; + self.mti = 1; + while self.mti < N { + self.mt[self.mti] = 1812433253u32 + .wrapping_mul(self.mt[self.mti - 1] ^ (self.mt[self.mti - 1] >> 30)) + + self.mti as u32; + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array self.mt[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + self.mti += 1; + } + } + + /* initialize by an array with array-length */ + /* init_key is the array for initializing keys */ + /* key_length is its length */ + /* slight change for C++, 2004/2/26 */ + pub fn seed_slice(&mut self, init_key: &[u32]) { + let mut i; + let mut j; + let mut k; + self.seed(19650218); + i = 1; + j = 0; + k = if N > init_key.len() { + N + } else { + init_key.len() + }; + while k != 0 { + self.mt[i] = (self.mt[i] + ^ ((self.mt[i - 1] ^ (self.mt[i - 1] >> 30)).wrapping_mul(1664525u32))) + + init_key[j] + + j as u32; /* non linear */ + self.mt[i] &= 0xffffffffu32; /* for WORDSIZE > 32 machines */ + i += 1; + j += 1; + if i >= N { + self.mt[0] = self.mt[N - 1]; + i = 1; + } + if j >= init_key.len() { + j = 0; + } + k -= 1; + } + k = N - 1; + while k != 0 { + self.mt[i] = (self.mt[i] + ^ ((self.mt[i - 1] ^ (self.mt[i - 1] >> 30)).wrapping_mul(1566083941u32))) + - i as u32; /* non linear */ + self.mt[i] &= 0xffffffffu32; /* for WORDSIZE > 32 machines */ + i += 1; + if i >= N { + self.mt[0] = self.mt[N - 1]; + i = 1; + } + k -= 1; + } + + self.mt[0] = 0x80000000u32; /* MSB is 1; assuring non-zero initial array */ + } + + /* generates a random number on [0,0xffffffff]-interval */ + fn gen_u32(&mut self) -> u32 { + let mut y: u32; + let mag01 = |x| if (x & 0x1) == 1 { MATRIX_A } else { 0 }; + /* mag01[x] = x * MATRIX_A for x=0,1 */ + + if self.mti >= N { + /* generate N words at one time */ + + if self.mti == N + 1 + /* if seed() has not been called, */ + { + self.seed(5489u32); + } /* a default initial seed is used */ + + for kk in 0..N - M { + y = (self.mt[kk] & UPPER_MASK) | (self.mt[kk + 1] & LOWER_MASK); + self.mt[kk] = self.mt[kk + M] ^ (y >> 1) ^ mag01(y); + } + for kk in N - M..N - 1 { + y = (self.mt[kk] & UPPER_MASK) | (self.mt[kk + 1] & LOWER_MASK); + self.mt[kk] = self.mt[kk.wrapping_add(M.wrapping_sub(N))] ^ (y >> 1) ^ mag01(y); + } + y = (self.mt[N - 1] & UPPER_MASK) | (self.mt[0] & LOWER_MASK); + self.mt[N - 1] = self.mt[M - 1] ^ (y >> 1) ^ mag01(y); + + self.mti = 0; + } + + y = self.mt[self.mti]; + self.mti += 1; + + /* Tempering */ + y ^= y >> 11; + y ^= (y << 7) & 0x9d2c5680u32; + y ^= (y << 15) & 0xefc60000u32; + y ^= y >> 18; + + y + } +} + +/* generates a random number on [0,1) with 53-bit resolution*/ +pub fn gen_res53(rng: &mut R) -> f64 { + let a = rng.next_u32() >> 5; + let b = rng.next_u32() >> 6; + (a as f64 * 67108864.0 + b as f64) * (1.0 / 9007199254740992.0) +} +/* These real versions are due to Isaku Wada, 2002/01/09 added */ + +impl rand::RngCore for MT19937 { + fn next_u32(&mut self) -> u32 { + self.gen_u32() + } + fn next_u64(&mut self) -> u64 { + rand_core::impls::next_u64_via_u32(self) + } + fn fill_bytes(&mut self, dest: &mut [u8]) { + rand_core::impls::fill_bytes_via_next(self, dest) + } + fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), rand::Error> { + self.fill_bytes(dest); + Ok(()) + } +}