Skip to content
Snippets Groups Projects
Commit 0cac3c27 authored by Christoph Alt's avatar Christoph Alt
Browse files

Merge branch 'FixIterationGPU' into 'master'

[FIX] Iteration slices with GPU kernels

Closes #58

See merge request !317
parents 262446d1 5e12eabd
Branches
Tags
1 merge request!317[FIX] Iteration slices with GPU kernels
Pipeline #51669 passed with stages
in 24 minutes and 1 second
...@@ -124,12 +124,18 @@ class BlockIndexing(AbstractIndexing): ...@@ -124,12 +124,18 @@ class BlockIndexing(AbstractIndexing):
self._symbolic_shape = [e if isinstance(e, sp.Basic) else None for e in field.spatial_shape] self._symbolic_shape = [e if isinstance(e, sp.Basic) else None for e in field.spatial_shape]
self._compile_time_block_size = compile_time_block_size self._compile_time_block_size = compile_time_block_size
@property
def cuda_indices(self):
block_size = self._block_size if self._compile_time_block_size else BLOCK_DIM
indices = [block_index * bs + thread_idx
for block_index, bs, thread_idx in zip(BLOCK_IDX, block_size, THREAD_IDX)]
return indices[:self._dim]
@property @property
def coordinates(self): def coordinates(self):
offsets = _get_start_from_slice(self._iterationSlice) offsets = _get_start_from_slice(self._iterationSlice)
block_size = self._block_size if self._compile_time_block_size else BLOCK_DIM coordinates = [c + off for c, off in zip(self.cuda_indices, offsets)]
coordinates = [block_index * bs + thread_idx + off
for block_index, bs, thread_idx, off in zip(BLOCK_IDX, block_size, THREAD_IDX, offsets)]
return coordinates[:self._dim] return coordinates[:self._dim]
...@@ -159,8 +165,13 @@ class BlockIndexing(AbstractIndexing): ...@@ -159,8 +165,13 @@ class BlockIndexing(AbstractIndexing):
def guard(self, kernel_content, arr_shape): def guard(self, kernel_content, arr_shape):
arr_shape = arr_shape[:self._dim] arr_shape = arr_shape[:self._dim]
conditions = [c < end end = _get_end_from_slice(self._iterationSlice, arr_shape)
for c, end in zip(self.coordinates, _get_end_from_slice(self._iterationSlice, arr_shape))]
conditions = [c < e for c, e in zip(self.coordinates, end)]
for cuda_index, iter_slice in zip(self.cuda_indices, self._iterationSlice):
if iter_slice.step > 1:
conditions.append(sp.Eq(sp.Mod(cuda_index, iter_slice.step), 0))
condition = conditions[0] condition = conditions[0]
for c in conditions[1:]: for c in conditions[1:]:
condition = sp.And(condition, c) condition = sp.And(condition, c)
......
...@@ -171,12 +171,13 @@ class TypeAdder: ...@@ -171,12 +171,13 @@ class TypeAdder:
args_types = [self.figure_out_type(a) for a in expr.args] args_types = [self.figure_out_type(a) for a in expr.args]
new_args = [a if t.dtype_eq(bool_type) else BooleanCastFunc(a, bool_type) for a, t in args_types] new_args = [a if t.dtype_eq(bool_type) else BooleanCastFunc(a, bool_type) for a, t in args_types]
return expr.func(*new_args), bool_type return expr.func(*new_args), bool_type
elif type(expr, ) in pystencils.integer_functions.__dict__.values(): elif type(expr, ) in pystencils.integer_functions.__dict__.values() or isinstance(expr, sp.Mod):
args_types = [self.figure_out_type(a) for a in expr.args] args_types = [self.figure_out_type(a) for a in expr.args]
collated_type = collate_types([t for _, t in args_types]) collated_type = collate_types([t for _, t in args_types])
# TODO: should we downcast to integer? If yes then which integer type? # TODO: should we downcast to integer? If yes then which integer type?
if not collated_type.is_int(): if not collated_type.is_int():
raise ValueError(f"Integer functions need to be used with integer types but {collated_type} was given") raise ValueError(f"Integer functions or Modulo need to be used with integer types "
f"but {collated_type} was given")
return expr, collated_type return expr, collated_type
elif isinstance(expr, flag_cond): elif isinstance(expr, flag_cond):
......
import pytest
import sympy as sp
import pystencils as ps
from pystencils.astnodes import LoopOverCoordinate, Conditional, Block, SympyAssignment
@pytest.mark.parametrize('target', [ps.Target.CPU, ps.Target.GPU])
@pytest.mark.parametrize('iteration_slice', [False, True])
def test_mod(target, iteration_slice):
if target == ps.Target.GPU:
pytest.importorskip("pycuda")
dh = ps.create_data_handling(domain_size=(5, 5), periodicity=True, default_target=ps.Target.CPU)
loop_ctrs = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dh.dim)]
cond = [sp.Eq(sp.Mod(loop_ctrs[i], 2), 1) for i in range(dh.dim)]
field = dh.add_array("a", values_per_cell=1)
eq_list = [SympyAssignment(field.center, 1.0)]
if iteration_slice:
iteration_slice = ps.make_slice[1:-1:2, 1:-1:2]
config = ps.CreateKernelConfig(target=dh.default_target, iteration_slice=iteration_slice)
assign = eq_list
else:
assign = [Conditional(sp.And(*cond), Block(eq_list))]
config = ps.CreateKernelConfig(target=dh.default_target)
kernel = ps.create_kernel(assign, config=config).compile()
dh.fill(field.name, 0, ghost_layers=True)
if config.target == ps.enums.Target.GPU:
dh.to_gpu(field.name)
dh.run_kernel(kernel)
if config.target == ps.enums.Target.GPU:
dh.to_cpu(field.name)
result = dh.gather_array(field.name, ghost_layers=True)
for x in range(result.shape[0]):
for y in range(result.shape[1]):
if x % 2 == 1 and y % 2 == 1:
assert result[x, y] == 1.0
else:
assert result[x, y] == 0.0
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment