diff --git a/pystencils/include/philox_rand.h b/pystencils/include/philox_rand.h index bbf1c1ac0ed015539743e4f027ed875209d08f61..283204921079ebfd79e022af53b17963de874cf4 100644 --- a/pystencils/include/philox_rand.h +++ b/pystencils/include/philox_rand.h @@ -52,8 +52,7 @@ QUALIFIERS void _philox4x32bumpkey(uint32* key) QUALIFIERS double _uniform_double_hq(uint32 x, uint32 y) { - unsigned long long z = (unsigned long long)x ^ - ((unsigned long long)y << (53 - 32)); + uint64 z = (uint64)x ^ ((uint64)y << (53 - 32)); return z * TWOPOW53_INV_DOUBLE + (TWOPOW53_INV_DOUBLE/2.0); } diff --git a/pystencils/rng.py b/pystencils/rng.py index 198251624e5bae5493b10c6cf1bc06f84bc088c5..48f3fb9b6d5dfb0521d1cc1c2f233600397966f4 100644 --- a/pystencils/rng.py +++ b/pystencils/rng.py @@ -5,21 +5,18 @@ from pystencils import TypedSymbol from pystencils.astnodes import LoopOverCoordinate from pystencils.backends.cbackend import CustomCodeNode -philox_two_doubles_call = """ -{result_symbols[0].dtype} {result_symbols[0].name}; -{result_symbols[1].dtype} {result_symbols[1].name}; -philox_double2({parameters}, {result_symbols[0].name}, {result_symbols[1].name}); -""" -philox_four_floats_call = """ -{result_symbols[0].dtype} {result_symbols[0].name}; -{result_symbols[1].dtype} {result_symbols[1].name}; -{result_symbols[2].dtype} {result_symbols[2].name}; -{result_symbols[3].dtype} {result_symbols[3].name}; -philox_float4({parameters}, - {result_symbols[0].name}, {result_symbols[1].name}, {result_symbols[2].name}, {result_symbols[3].name}); - -""" +def _get_philox_template(data_type, num_vars): + if data_type is np.float32: + c_type = "float" + elif data_type is np.float64: + c_type = "double" + template = "\n" + for i in range(num_vars): + template += "{{result_symbols[{}].dtype}} {{result_symbols[{}].name}};\n".format(i, i) + template += ("philox_{}{}({{parameters}}, " + ", ".join(["{{result_symbols[{}].name}}"] * num_vars) + ");\n") \ + .format(c_type, num_vars, *tuple(range(num_vars))) + return template def _get_philox_code(template, dialect, vector_instruction_set, time_step, offsets, keys, dim, result_symbols): @@ -39,10 +36,10 @@ def _get_philox_code(template, dialect, vector_instruction_set, time_step, offse raise NotImplementedError("Not yet implemented for this backend") -class PhiloxTwoDoubles(CustomCodeNode): +class PhiloxBase(CustomCodeNode): def __init__(self, dim, time_step=TypedSymbol("time_step", np.uint32), offsets=(0, 0, 0), keys=(0, 0)): - self.result_symbols = tuple(TypedSymbol(sp.Dummy().name, np.float64) for _ in range(2)) + self.result_symbols = tuple(TypedSymbol(sp.Dummy().name, self._data_type) for _ in range(self._num_vars)) symbols_read = [s for s in keys if isinstance(s, sp.Symbol)] super().__init__("", symbols_read=symbols_read, symbols_defined=self.result_symbols) self._time_step = time_step @@ -68,47 +65,22 @@ class PhiloxTwoDoubles(CustomCodeNode): return self # nothing to replace inside this node - would destroy intermediate "dummy" by re-creating them def get_code(self, dialect, vector_instruction_set): - return _get_philox_code(philox_two_doubles_call, dialect, vector_instruction_set, + template = _get_philox_template(self._data_type, self._num_vars) + return _get_philox_code(template, dialect, vector_instruction_set, self._time_step, self._offsets, self.keys, self._dim, self.result_symbols) def __repr__(self): - return "{}, {} <- PhiloxRNG".format(*self.result_symbols) - - -class PhiloxFourFloats(CustomCodeNode): - - def __init__(self, dim, time_step=TypedSymbol("time_step", np.uint32), offsets=(0, 0, 0), keys=(0, 0)): - self.result_symbols = tuple(TypedSymbol(sp.Dummy().name, np.float32) for _ in range(4)) - symbols_read = [s for s in keys if isinstance(s, sp.Symbol)] - super().__init__("", symbols_read=symbols_read, symbols_defined=self.result_symbols) - self._time_step = time_step - self._offsets = offsets - self.headers = ['"philox_rand.h"'] - self.keys = tuple(keys) - self._args = sp.sympify((dim, time_step, offsets, keys)) - self._dim = dim - - @property - def args(self): - return self._args + return (", ".join(['{}'] * self._num_vars) + " <- PhiloxRNG").format(*self.result_symbols) - @property - def undefined_symbols(self): - result = {a for a in (self._time_step, *self._offsets, *self.keys) if isinstance(a, sp.Symbol)} - loop_counters = [LoopOverCoordinate.get_loop_counter_symbol(i) - for i in range(self._dim)] - result.update(loop_counters) - return result - def fast_subs(self, _): - return self # nothing to replace inside this node - would destroy intermediate "dummy" by re-creating them +class PhiloxTwoDoubles(PhiloxBase): + _data_type = np.float64 + _num_vars = 2 - def get_code(self, dialect, vector_instruction_set): - return _get_philox_code(philox_four_floats_call, dialect, vector_instruction_set, - self._time_step, self._offsets, self.keys, self._dim, self.result_symbols) - def __repr__(self): - return "{}, {}, {}, {} <- PhiloxRNG".format(*self.result_symbols) +class PhiloxFourFloats(PhiloxBase): + _data_type = np.float32 + _num_vars = 4 def random_symbol(assignment_list, seed=TypedSymbol("seed", np.uint32), rng_node=PhiloxTwoDoubles, *args, **kwargs): diff --git a/pystencils_tests/test_random.py b/pystencils_tests/test_random.py index f86644875acce6679473336705b929f5f3083e6e..1c5ee9cbe143f81a253de57cf23d7b75914a9d82 100644 --- a/pystencils_tests/test_random.py +++ b/pystencils_tests/test_random.py @@ -4,6 +4,12 @@ import pystencils as ps from pystencils.rng import PhiloxFourFloats, PhiloxTwoDoubles +# curand_Philox4x32_10(make_uint4(124, i, j, 0), make_uint2(0, 0)) +philox_reference = np.array([[[3576608082, 1252663339, 1987745383, 348040302], + [1032407765, 970978240, 2217005168, 2424826293]], + [[2958765206, 3725192638, 2623672781, 1373196132], + [ 850605163, 1694561295, 3285694973, 2799652583]]]) + def test_philox_double(): for target in ('cpu', 'gpu'): dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target=target) @@ -24,6 +30,12 @@ def test_philox_double(): arr = dh.gather_array('f') assert np.logical_and(arr <= 1.0, arr >= 0).all() + x = philox_reference[:,:,0::2] + y = philox_reference[:,:,1::2] + z = x ^ y << (53 - 32) + double_reference = z * 2.**-53 + 2.**-54 + assert(np.allclose(arr, double_reference, rtol=0, atol=np.finfo(np.float64).eps)) + def test_philox_float(): for target in ('cpu', 'gpu'): @@ -41,3 +53,6 @@ def test_philox_float(): dh.all_to_cpu() arr = dh.gather_array('f') assert np.logical_and(arr <= 1.0, arr >= 0).all() + + float_reference = philox_reference * 2.**-32 + 2.**-33 + assert(np.allclose(arr, float_reference, rtol=0, atol=np.finfo(np.float32).eps))