Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Commits on Source (4)
......@@ -124,12 +124,18 @@ class BlockIndexing(AbstractIndexing):
self._symbolic_shape = [e if isinstance(e, sp.Basic) else None for e in field.spatial_shape]
self._compile_time_block_size = compile_time_block_size
@property
def cuda_indices(self):
block_size = self._block_size if self._compile_time_block_size else BLOCK_DIM
indices = [block_index * bs + thread_idx
for block_index, bs, thread_idx in zip(BLOCK_IDX, block_size, THREAD_IDX)]
return indices[:self._dim]
@property
def coordinates(self):
offsets = _get_start_from_slice(self._iterationSlice)
block_size = self._block_size if self._compile_time_block_size else BLOCK_DIM
coordinates = [block_index * bs + thread_idx + off
for block_index, bs, thread_idx, off in zip(BLOCK_IDX, block_size, THREAD_IDX, offsets)]
coordinates = [c + off for c, off in zip(self.cuda_indices, offsets)]
return coordinates[:self._dim]
......@@ -159,8 +165,13 @@ class BlockIndexing(AbstractIndexing):
def guard(self, kernel_content, arr_shape):
arr_shape = arr_shape[:self._dim]
conditions = [c < end
for c, end in zip(self.coordinates, _get_end_from_slice(self._iterationSlice, arr_shape))]
end = _get_end_from_slice(self._iterationSlice, arr_shape)
conditions = [c < e for c, e in zip(self.coordinates, end)]
for cuda_index, iter_slice in zip(self.cuda_indices, self._iterationSlice):
if iter_slice.step > 1:
conditions.append(sp.Eq(sp.Mod(cuda_index, iter_slice.step), 0))
condition = conditions[0]
for c in conditions[1:]:
condition = sp.And(condition, c)
......
......@@ -34,7 +34,7 @@ def create_cuda_kernel(assignments: Union[AssignmentCollection, NodeCollection],
all_fields = fields_read.union(fields_written)
read_only_fields = set([f.name for f in fields_read - fields_written])
buffers = set([f for f in all_fields if FieldType.is_buffer(f) or FieldType.is_custom(f)])
buffers = set([f for f in all_fields if FieldType.is_buffer(f)])
fields_without_buffers = all_fields - buffers
field_accesses = set()
......
......@@ -161,9 +161,11 @@ def make_loop_over_domain(body, iteration_slice=None, ghost_layers=None, loop_or
tuple of loop-node, ghost_layer_info
"""
# find correct ordering by inspecting participating FieldAccesses
absolut_accesses_only = False
field_accesses = body.atoms(Field.Access)
field_accesses = {e for e in field_accesses if not e.is_absolute_access}
if len(field_accesses) == 0: # when kernel contains only absolute accesses
absolut_accesses_only = True
# exclude accesses to buffers from field_list, because buffers are treated separately
field_list = [e.field for e in field_accesses if not (FieldType.is_buffer(e.field) or FieldType.is_custom(e.field))]
if len(field_list) == 0: # when kernel contains only custom fields
......@@ -174,14 +176,21 @@ def make_loop_over_domain(body, iteration_slice=None, ghost_layers=None, loop_or
if loop_order is None:
loop_order = get_optimal_loop_ordering(fields)
shape = get_common_shape(fields)
if absolut_accesses_only:
absolut_access_fields = {e.field for e in body.atoms(Field.Access)}
shape = get_common_shape(absolut_access_fields)
else:
shape = get_common_shape(fields)
unify_shape_symbols(body, common_shape=shape, fields=fields)
if iteration_slice is not None:
iteration_slice = normalize_slice(iteration_slice, shape)
if ghost_layers is None:
required_ghost_layers = max([fa.required_ghost_layers for fa in field_accesses])
if absolut_accesses_only:
required_ghost_layers = 0
else:
required_ghost_layers = max([fa.required_ghost_layers for fa in field_accesses])
ghost_layers = [(required_ghost_layers, required_ghost_layers)] * len(loop_order)
if isinstance(ghost_layers, int):
ghost_layers = [(ghost_layers, ghost_layers)] * len(loop_order)
......
......@@ -171,12 +171,13 @@ class TypeAdder:
args_types = [self.figure_out_type(a) for a in expr.args]
new_args = [a if t.dtype_eq(bool_type) else BooleanCastFunc(a, bool_type) for a, t in args_types]
return expr.func(*new_args), bool_type
elif type(expr, ) in pystencils.integer_functions.__dict__.values():
elif type(expr, ) in pystencils.integer_functions.__dict__.values() or isinstance(expr, sp.Mod):
args_types = [self.figure_out_type(a) for a in expr.args]
collated_type = collate_types([t for _, t in args_types])
# TODO: should we downcast to integer? If yes then which integer type?
if not collated_type.is_int():
raise ValueError(f"Integer functions need to be used with integer types but {collated_type} was given")
raise ValueError(f"Integer functions or Modulo need to be used with integer types "
f"but {collated_type} was given")
return expr, collated_type
elif isinstance(expr, flag_cond):
......
import pytest
import sympy as sp
import pystencils as ps
from pystencils.astnodes import LoopOverCoordinate, Conditional, Block, SympyAssignment
@pytest.mark.parametrize('target', [ps.Target.CPU, ps.Target.GPU])
@pytest.mark.parametrize('iteration_slice', [False, True])
def test_mod(target, iteration_slice):
if target == ps.Target.GPU:
pytest.importorskip("pycuda")
dh = ps.create_data_handling(domain_size=(5, 5), periodicity=True, default_target=ps.Target.CPU)
loop_ctrs = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dh.dim)]
cond = [sp.Eq(sp.Mod(loop_ctrs[i], 2), 1) for i in range(dh.dim)]
field = dh.add_array("a", values_per_cell=1)
eq_list = [SympyAssignment(field.center, 1.0)]
if iteration_slice:
iteration_slice = ps.make_slice[1:-1:2, 1:-1:2]
config = ps.CreateKernelConfig(target=dh.default_target, iteration_slice=iteration_slice)
assign = eq_list
else:
assign = [Conditional(sp.And(*cond), Block(eq_list))]
config = ps.CreateKernelConfig(target=dh.default_target)
kernel = ps.create_kernel(assign, config=config).compile()
dh.fill(field.name, 0, ghost_layers=True)
if config.target == ps.enums.Target.GPU:
dh.to_gpu(field.name)
dh.run_kernel(kernel)
if config.target == ps.enums.Target.GPU:
dh.to_cpu(field.name)
result = dh.gather_array(field.name, ghost_layers=True)
for x in range(result.shape[0]):
for y in range(result.shape[1]):
if x % 2 == 1 and y % 2 == 1:
assert result[x, y] == 1.0
else:
assert result[x, y] == 0.0