Commit e20f82a3 authored by Markus Holzer's avatar Markus Holzer
Browse files

Merge branch 'rng' into 'master'

Make the RNG node behave more like a regular node

See merge request pycodegen/pystencils!194
parents cf0dd636 fd7730d0
...@@ -142,7 +142,7 @@ pycodegen-integration: ...@@ -142,7 +142,7 @@ pycodegen-integration:
- cd ../pygrandchem - cd ../pygrandchem
- py.test -v -n $NUM_CORES . - py.test -v -n $NUM_CORES .
- cd ../walberla/build/ - cd ../walberla/build/
- make CodegenJacobiCPU CodegenJacobiGPU CodegenPoissonCPU CodegenPoissonGPU MicroBenchmarkGpuLbm LbCodeGenerationExample UniformGridBenchmarkGPU_trt UniformGridBenchmarkGPU_entropic_kbc_n4 - make CodegenJacobiCPU CodegenJacobiGPU CodegenPoissonCPU CodegenPoissonGPU MicroBenchmarkGpuLbm LbCodeGenerationExample UniformGridBenchmarkGPU_trt UniformGridBenchmarkGPU_entropic_kbc_n4 FluctuatingMRT
- cd apps/benchmarks/UniformGridGPU - cd apps/benchmarks/UniformGridGPU
- make -j $NUM_CORES - make -j $NUM_CORES
- cd ../UniformGridGenerated - cd ../UniformGridGenerated
......
...@@ -37,8 +37,12 @@ class Node: ...@@ -37,8 +37,12 @@ class Node:
def subs(self, subs_dict) -> None: def subs(self, subs_dict) -> None:
"""Inplace! Substitute, similar to sympy's but modifies the AST inplace.""" """Inplace! Substitute, similar to sympy's but modifies the AST inplace."""
for a in self.args: for i, a in enumerate(self.args):
a.subs(subs_dict) result = a.subs(subs_dict)
if isinstance(a, sp.Expr): # sympy expressions' subs is out-of-place
self.args[i] = result
else: # all other should be in-place
assert result is None
@property @property
def func(self): def func(self):
......
...@@ -19,11 +19,9 @@ def _get_rng_template(name, data_type, num_vars): ...@@ -19,11 +19,9 @@ def _get_rng_template(name, data_type, num_vars):
return template return template
def _get_rng_code(template, dialect, vector_instruction_set, time_step, coordinates, keys, dim, result_symbols): def _get_rng_code(template, dialect, vector_instruction_set, args, result_symbols):
parameters = [time_step] + coordinates + [0] * (3 - dim) + list(keys)
if dialect == 'cuda' or (dialect == 'c' and vector_instruction_set is None): if dialect == 'cuda' or (dialect == 'c' and vector_instruction_set is None):
return template.format(parameters=', '.join(str(p) for p in parameters), return template.format(parameters=', '.join(str(a) for a in args),
result_symbols=result_symbols) result_symbols=result_symbols)
else: else:
raise NotImplementedError("Not yet implemented for this backend") raise NotImplementedError("Not yet implemented for this backend")
...@@ -31,47 +29,38 @@ def _get_rng_code(template, dialect, vector_instruction_set, time_step, coordina ...@@ -31,47 +29,38 @@ def _get_rng_code(template, dialect, vector_instruction_set, time_step, coordina
class RNGBase(CustomCodeNode): class RNGBase(CustomCodeNode):
def __init__(self, dim, time_step=TypedSymbol("time_step", np.uint32), offsets=(0, 0, 0), keys=None): id = 0
def __init__(self, dim, time_step=TypedSymbol("time_step", np.uint32), offsets=None, keys=None):
if keys is None: if keys is None:
keys = (0,) * self._num_keys keys = (0,) * self._num_keys
if offsets is None:
offsets = (0,) * dim
if len(keys) != self._num_keys: if len(keys) != self._num_keys:
raise ValueError(f"Provided {len(keys)} keys but need {self._num_keys}") raise ValueError(f"Provided {len(keys)} keys but need {self._num_keys}")
if len(offsets) != 3: if len(offsets) != dim:
raise ValueError(f"Provided {len(offsets)} offsets but need {3}") raise ValueError(f"Provided {len(offsets)} offsets but need {dim}")
self.result_symbols = tuple(TypedSymbol(sp.Dummy().name, self._data_type) for _ in range(self._num_vars)) coordinates = [LoopOverCoordinate.get_loop_counter_symbol(i) + offsets[i] for i in range(dim)]
symbols_read = [s for s in keys if isinstance(s, sp.Symbol)] if dim < 3:
coordinates.append(0)
self._args = sp.sympify([time_step, *coordinates, *keys])
self.result_symbols = tuple(TypedSymbol(f'random_{self.id}_{i}', self._data_type)
for i in range(self._num_vars))
symbols_read = set.union(*[s.atoms(sp.Symbol) for s in self.args])
super().__init__("", symbols_read=symbols_read, symbols_defined=self.result_symbols) super().__init__("", symbols_read=symbols_read, symbols_defined=self.result_symbols)
self._time_step = time_step
self._offsets = offsets
self._coordinates = [LoopOverCoordinate.get_loop_counter_symbol(i) + offsets[i] for i in range(dim)]
self.headers = [f'"{self._name}_rand.h"'] self.headers = [f'"{self._name}_rand.h"']
self.keys = tuple(keys)
self._args = sp.sympify((dim, time_step, keys)) RNGBase.id += 1
self._dim = dim
@property @property
def args(self): def args(self):
return self._args return self._args
@property
def undefined_symbols(self):
result = {a for a in (self._time_step, *self._offsets, *self.keys) if isinstance(a, sp.Symbol)}
loop_counters = [LoopOverCoordinate.get_loop_counter_symbol(i)
for i in range(self._dim)]
result.update(loop_counters)
return result
def subs(self, subs_dict) -> None:
for i in range(len(self._coordinates)):
self._coordinates[i] = self._coordinates[i].subs(subs_dict)
def fast_subs(self, *_):
return self # nothing to replace inside this node - would destroy intermediate "dummy" by re-creating them
def get_code(self, dialect, vector_instruction_set): def get_code(self, dialect, vector_instruction_set):
template = _get_rng_template(self._name, self._data_type, self._num_vars) template = _get_rng_template(self._name, self._data_type, self._num_vars)
return _get_rng_code(template, dialect, vector_instruction_set, return _get_rng_code(template, dialect, vector_instruction_set, self.args, self.result_symbols)
self._time_step, self._coordinates, self.keys, self._dim, self.result_symbols)
def __repr__(self): def __repr__(self):
return (", ".join(['{}'] * self._num_vars) + " \\leftarrow {}RNG").format(*self.result_symbols, return (", ".join(['{}'] * self._num_vars) + " \\leftarrow {}RNG").format(*self.result_symbols,
......
...@@ -12,58 +12,58 @@ philox_reference = np.array([[[3576608082, 1252663339, 1987745383, 348040302], ...@@ -12,58 +12,58 @@ philox_reference = np.array([[[3576608082, 1252663339, 1987745383, 348040302],
[[2958765206, 3725192638, 2623672781, 1373196132], [[2958765206, 3725192638, 2623672781, 1373196132],
[ 850605163, 1694561295, 3285694973, 2799652583]]]) [ 850605163, 1694561295, 3285694973, 2799652583]]])
def test_philox_double(): @pytest.mark.parametrize('target', ('cpu', 'gpu'))
for target in ('cpu', 'gpu'): def test_philox_double(target):
if target == 'gpu': if target == 'gpu':
pytest.importorskip('pycuda') pytest.importorskip('pycuda')
dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target=target) dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target=target)
f = dh.add_array("f", values_per_cell=2) f = dh.add_array("f", values_per_cell=2)
dh.fill('f', 42.0) dh.fill('f', 42.0)
philox_node = PhiloxTwoDoubles(dh.dim) philox_node = PhiloxTwoDoubles(dh.dim)
assignments = [philox_node, assignments = [philox_node,
ps.Assignment(f(0), philox_node.result_symbols[0]), ps.Assignment(f(0), philox_node.result_symbols[0]),
ps.Assignment(f(1), philox_node.result_symbols[1])] ps.Assignment(f(1), philox_node.result_symbols[1])]
kernel = ps.create_kernel(assignments, target=dh.default_target).compile() kernel = ps.create_kernel(assignments, target=dh.default_target).compile()
dh.all_to_gpu() dh.all_to_gpu()
dh.run_kernel(kernel, time_step=124) dh.run_kernel(kernel, time_step=124)
dh.all_to_cpu() dh.all_to_cpu()
arr = dh.gather_array('f') arr = dh.gather_array('f')
assert np.logical_and(arr <= 1.0, arr >= 0).all() assert np.logical_and(arr <= 1.0, arr >= 0).all()
x = philox_reference[:,:,0::2] x = philox_reference[:,:,0::2]
y = philox_reference[:,:,1::2] y = philox_reference[:,:,1::2]
z = x ^ y << (53 - 32) z = x ^ y << (53 - 32)
double_reference = z * 2.**-53 + 2.**-54 double_reference = z * 2.**-53 + 2.**-54
assert(np.allclose(arr, double_reference, rtol=0, atol=np.finfo(np.float64).eps)) assert(np.allclose(arr, double_reference, rtol=0, atol=np.finfo(np.float64).eps))
def test_philox_float(): @pytest.mark.parametrize('target', ('cpu', 'gpu'))
for target in ('cpu', 'gpu'): def test_philox_float(target):
if target == 'gpu': if target == 'gpu':
pytest.importorskip('pycuda') pytest.importorskip('pycuda')
dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target=target) dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target=target)
f = dh.add_array("f", values_per_cell=4) f = dh.add_array("f", values_per_cell=4)
dh.fill('f', 42.0) dh.fill('f', 42.0)
philox_node = PhiloxFourFloats(dh.dim) philox_node = PhiloxFourFloats(dh.dim)
assignments = [philox_node] + [ps.Assignment(f(i), philox_node.result_symbols[i]) for i in range(4)] assignments = [philox_node] + [ps.Assignment(f(i), philox_node.result_symbols[i]) for i in range(4)]
kernel = ps.create_kernel(assignments, target=dh.default_target).compile() kernel = ps.create_kernel(assignments, target=dh.default_target).compile()
dh.all_to_gpu() dh.all_to_gpu()
dh.run_kernel(kernel, time_step=124) dh.run_kernel(kernel, time_step=124)
dh.all_to_cpu() dh.all_to_cpu()
arr = dh.gather_array('f') arr = dh.gather_array('f')
assert np.logical_and(arr <= 1.0, arr >= 0).all() assert np.logical_and(arr <= 1.0, arr >= 0).all()
float_reference = philox_reference * 2.**-32 + 2.**-33 float_reference = philox_reference * 2.**-32 + 2.**-33
assert(np.allclose(arr, float_reference, rtol=0, atol=np.finfo(np.float32).eps)) assert(np.allclose(arr, float_reference, rtol=0, atol=np.finfo(np.float32).eps))
def test_aesni_double(): def test_aesni_double():
dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target="cpu") dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target="cpu")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment