Commit e20f82a3 authored by Markus Holzer's avatar Markus Holzer
Browse files

Merge branch 'rng' into 'master'

Make the RNG node behave more like a regular node

See merge request !194
parents cf0dd636 fd7730d0
...@@ -142,7 +142,7 @@ pycodegen-integration: ...@@ -142,7 +142,7 @@ pycodegen-integration:
- cd ../pygrandchem - cd ../pygrandchem
- py.test -v -n $NUM_CORES . - py.test -v -n $NUM_CORES .
- cd ../walberla/build/ - cd ../walberla/build/
- make CodegenJacobiCPU CodegenJacobiGPU CodegenPoissonCPU CodegenPoissonGPU MicroBenchmarkGpuLbm LbCodeGenerationExample UniformGridBenchmarkGPU_trt UniformGridBenchmarkGPU_entropic_kbc_n4 - make CodegenJacobiCPU CodegenJacobiGPU CodegenPoissonCPU CodegenPoissonGPU MicroBenchmarkGpuLbm LbCodeGenerationExample UniformGridBenchmarkGPU_trt UniformGridBenchmarkGPU_entropic_kbc_n4 FluctuatingMRT
- cd apps/benchmarks/UniformGridGPU - cd apps/benchmarks/UniformGridGPU
- make -j $NUM_CORES - make -j $NUM_CORES
- cd ../UniformGridGenerated - cd ../UniformGridGenerated
......
...@@ -37,8 +37,12 @@ class Node: ...@@ -37,8 +37,12 @@ class Node:
def subs(self, subs_dict) -> None: def subs(self, subs_dict) -> None:
"""Inplace! Substitute, similar to sympy's but modifies the AST inplace.""" """Inplace! Substitute, similar to sympy's but modifies the AST inplace."""
for a in self.args: for i, a in enumerate(self.args):
a.subs(subs_dict) result = a.subs(subs_dict)
if isinstance(a, sp.Expr): # sympy expressions' subs is out-of-place
self.args[i] = result
else: # all other should be in-place
assert result is None
@property @property
def func(self): def func(self):
......
...@@ -19,11 +19,9 @@ def _get_rng_template(name, data_type, num_vars): ...@@ -19,11 +19,9 @@ def _get_rng_template(name, data_type, num_vars):
return template return template
def _get_rng_code(template, dialect, vector_instruction_set, time_step, coordinates, keys, dim, result_symbols): def _get_rng_code(template, dialect, vector_instruction_set, args, result_symbols):
parameters = [time_step] + coordinates + [0] * (3 - dim) + list(keys)
if dialect == 'cuda' or (dialect == 'c' and vector_instruction_set is None): if dialect == 'cuda' or (dialect == 'c' and vector_instruction_set is None):
return template.format(parameters=', '.join(str(p) for p in parameters), return template.format(parameters=', '.join(str(a) for a in args),
result_symbols=result_symbols) result_symbols=result_symbols)
else: else:
raise NotImplementedError("Not yet implemented for this backend") raise NotImplementedError("Not yet implemented for this backend")
...@@ -31,47 +29,38 @@ def _get_rng_code(template, dialect, vector_instruction_set, time_step, coordina ...@@ -31,47 +29,38 @@ def _get_rng_code(template, dialect, vector_instruction_set, time_step, coordina
class RNGBase(CustomCodeNode): class RNGBase(CustomCodeNode):
def __init__(self, dim, time_step=TypedSymbol("time_step", np.uint32), offsets=(0, 0, 0), keys=None): id = 0
def __init__(self, dim, time_step=TypedSymbol("time_step", np.uint32), offsets=None, keys=None):
if keys is None: if keys is None:
keys = (0,) * self._num_keys keys = (0,) * self._num_keys
if offsets is None:
offsets = (0,) * dim
if len(keys) != self._num_keys: if len(keys) != self._num_keys:
raise ValueError(f"Provided {len(keys)} keys but need {self._num_keys}") raise ValueError(f"Provided {len(keys)} keys but need {self._num_keys}")
if len(offsets) != 3: if len(offsets) != dim:
raise ValueError(f"Provided {len(offsets)} offsets but need {3}") raise ValueError(f"Provided {len(offsets)} offsets but need {dim}")
self.result_symbols = tuple(TypedSymbol(sp.Dummy().name, self._data_type) for _ in range(self._num_vars)) coordinates = [LoopOverCoordinate.get_loop_counter_symbol(i) + offsets[i] for i in range(dim)]
symbols_read = [s for s in keys if isinstance(s, sp.Symbol)] if dim < 3:
coordinates.append(0)
self._args = sp.sympify([time_step, *coordinates, *keys])
self.result_symbols = tuple(TypedSymbol(f'random_{self.id}_{i}', self._data_type)
for i in range(self._num_vars))
symbols_read = set.union(*[s.atoms(sp.Symbol) for s in self.args])
super().__init__("", symbols_read=symbols_read, symbols_defined=self.result_symbols) super().__init__("", symbols_read=symbols_read, symbols_defined=self.result_symbols)
self._time_step = time_step
self._offsets = offsets
self._coordinates = [LoopOverCoordinate.get_loop_counter_symbol(i) + offsets[i] for i in range(dim)]
self.headers = [f'"{self._name}_rand.h"'] self.headers = [f'"{self._name}_rand.h"']
self.keys = tuple(keys)
self._args = sp.sympify((dim, time_step, keys)) RNGBase.id += 1
self._dim = dim
@property @property
def args(self): def args(self):
return self._args return self._args
@property
def undefined_symbols(self):
result = {a for a in (self._time_step, *self._offsets, *self.keys) if isinstance(a, sp.Symbol)}
loop_counters = [LoopOverCoordinate.get_loop_counter_symbol(i)
for i in range(self._dim)]
result.update(loop_counters)
return result
def subs(self, subs_dict) -> None:
for i in range(len(self._coordinates)):
self._coordinates[i] = self._coordinates[i].subs(subs_dict)
def fast_subs(self, *_):
return self # nothing to replace inside this node - would destroy intermediate "dummy" by re-creating them
def get_code(self, dialect, vector_instruction_set): def get_code(self, dialect, vector_instruction_set):
template = _get_rng_template(self._name, self._data_type, self._num_vars) template = _get_rng_template(self._name, self._data_type, self._num_vars)
return _get_rng_code(template, dialect, vector_instruction_set, return _get_rng_code(template, dialect, vector_instruction_set, self.args, self.result_symbols)
self._time_step, self._coordinates, self.keys, self._dim, self.result_symbols)
def __repr__(self): def __repr__(self):
return (", ".join(['{}'] * self._num_vars) + " \\leftarrow {}RNG").format(*self.result_symbols, return (", ".join(['{}'] * self._num_vars) + " \\leftarrow {}RNG").format(*self.result_symbols,
......
...@@ -12,8 +12,8 @@ philox_reference = np.array([[[3576608082, 1252663339, 1987745383, 348040302], ...@@ -12,8 +12,8 @@ philox_reference = np.array([[[3576608082, 1252663339, 1987745383, 348040302],
[[2958765206, 3725192638, 2623672781, 1373196132], [[2958765206, 3725192638, 2623672781, 1373196132],
[ 850605163, 1694561295, 3285694973, 2799652583]]]) [ 850605163, 1694561295, 3285694973, 2799652583]]])
def test_philox_double(): @pytest.mark.parametrize('target', ('cpu', 'gpu'))
for target in ('cpu', 'gpu'): def test_philox_double(target):
if target == 'gpu': if target == 'gpu':
pytest.importorskip('pycuda') pytest.importorskip('pycuda')
...@@ -42,8 +42,8 @@ def test_philox_double(): ...@@ -42,8 +42,8 @@ def test_philox_double():
assert(np.allclose(arr, double_reference, rtol=0, atol=np.finfo(np.float64).eps)) assert(np.allclose(arr, double_reference, rtol=0, atol=np.finfo(np.float64).eps))
def test_philox_float(): @pytest.mark.parametrize('target', ('cpu', 'gpu'))
for target in ('cpu', 'gpu'): def test_philox_float(target):
if target == 'gpu': if target == 'gpu':
pytest.importorskip('pycuda') pytest.importorskip('pycuda')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment