Skip to content
Snippets Groups Projects
Commit 99d4b9fc authored by Martin Bauer's avatar Martin Bauer
Browse files

Merge branch 'staggered' into 'master'

Make create_staggered_kernel work with OpenMP

See merge request !123
parents b6c9f64c 4b618138
Branches
Tags
1 merge request!123Make create_staggered_kernel work with OpenMP
Pipeline #21023 passed with stages
in 3 minutes and 31 seconds
...@@ -24,6 +24,7 @@ def create_kernel(assignments, ...@@ -24,6 +24,7 @@ def create_kernel(assignments,
cpu_openmp=False, cpu_openmp=False,
cpu_vectorize_info=None, cpu_vectorize_info=None,
cpu_blocking=None, cpu_blocking=None,
omp_single_loop=True,
gpu_indexing='block', gpu_indexing='block',
gpu_indexing_params=MappingProxyType({}), gpu_indexing_params=MappingProxyType({}),
use_textures_for_interpolation=True, use_textures_for_interpolation=True,
...@@ -47,6 +48,7 @@ def create_kernel(assignments, ...@@ -47,6 +48,7 @@ def create_kernel(assignments,
skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for
periodicity kernel, that access the field outside the iteration bounds. Use with care! periodicity kernel, that access the field outside the iteration bounds. Use with care!
cpu_openmp: True or number of threads for OpenMP parallelization, False for no OpenMP cpu_openmp: True or number of threads for OpenMP parallelization, False for no OpenMP
omp_single_loop: if OpenMP is active: whether multiple outer loops are permitted
cpu_vectorize_info: a dictionary with keys, 'vector_instruction_set', 'assume_aligned' and 'nontemporal' cpu_vectorize_info: a dictionary with keys, 'vector_instruction_set', 'assume_aligned' and 'nontemporal'
for documentation of these parameters see vectorize function. Example: for documentation of these parameters see vectorize function. Example:
'{'instruction_set': 'avx512', 'assume_aligned': True, 'nontemporal':True}' '{'instruction_set': 'avx512', 'assume_aligned': True, 'nontemporal':True}'
...@@ -99,7 +101,7 @@ def create_kernel(assignments, ...@@ -99,7 +101,7 @@ def create_kernel(assignments,
if cpu_blocking: if cpu_blocking:
omp_collapse = loop_blocking(ast, cpu_blocking) omp_collapse = loop_blocking(ast, cpu_blocking)
if cpu_openmp: if cpu_openmp:
add_openmp(ast, num_threads=cpu_openmp, collapse=omp_collapse) add_openmp(ast, num_threads=cpu_openmp, collapse=omp_collapse, assume_single_outer_loop=omp_single_loop)
if cpu_vectorize_info: if cpu_vectorize_info:
if cpu_vectorize_info is True: if cpu_vectorize_info is True:
vectorize(ast) vectorize(ast)
...@@ -237,7 +239,7 @@ def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions= ...@@ -237,7 +239,7 @@ def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions=
Returns: Returns:
AST, see `create_kernel` AST, see `create_kernel`
""" """
assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs and 'omp_single_loop' not in kwargs
if isinstance(assignments, AssignmentCollection): if isinstance(assignments, AssignmentCollection):
subexpressions = assignments.subexpressions + [a for a in assignments.main_assignments subexpressions = assignments.subexpressions + [a for a in assignments.main_assignments
...@@ -325,7 +327,7 @@ def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions= ...@@ -325,7 +327,7 @@ def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions=
if target == 'cpu': if target == 'cpu':
from pystencils.cpu import create_kernel as create_kernel_cpu from pystencils.cpu import create_kernel as create_kernel_cpu
ast = create_kernel_cpu(final_assignments, ghost_layers=ghost_layers, **kwargs) ast = create_kernel_cpu(final_assignments, ghost_layers=ghost_layers, omp_single_loop=False, **kwargs)
else: else:
ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs) ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs)
return ast return ast
...@@ -341,6 +343,6 @@ def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions= ...@@ -341,6 +343,6 @@ def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions=
remove_start_conditional = any([gl[0] == 0 for gl in ghost_layers]) remove_start_conditional = any([gl[0] == 0 for gl in ghost_layers])
prepend_optimizations = [lambda ast: remove_conditionals_in_staggered_kernel(ast, remove_start_conditional), prepend_optimizations = [lambda ast: remove_conditionals_in_staggered_kernel(ast, remove_start_conditional),
move_constants_before_loop] move_constants_before_loop]
ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, omp_single_loop=False,
cpu_prepend_optimizations=prepend_optimizations, **kwargs) cpu_prepend_optimizations=prepend_optimizations, **kwargs)
return ast return ast
...@@ -5,7 +5,7 @@ import pystencils as ps ...@@ -5,7 +5,7 @@ import pystencils as ps
class TestStaggeredDiffusion: class TestStaggeredDiffusion:
def _run(self, num_neighbors, target='cpu'): def _run(self, num_neighbors, target='cpu', openmp=False):
L = (40, 40) L = (40, 40)
D = 0.066 D = 0.066
dt = 1 dt = 1
...@@ -33,8 +33,8 @@ class TestStaggeredDiffusion: ...@@ -33,8 +33,8 @@ class TestStaggeredDiffusion:
flux += [ps.Assignment(j.staggered_access("SW"), xy_staggered), flux += [ps.Assignment(j.staggered_access("SW"), xy_staggered),
ps.Assignment(j.staggered_access("NW"), xY_staggered)] ps.Assignment(j.staggered_access("NW"), xY_staggered)]
staggered_kernel = ps.create_staggered_kernel(flux, target=dh.default_target).compile() staggered_kernel = ps.create_staggered_kernel(flux, target=dh.default_target, cpu_openmp=openmp).compile()
div_kernel = ps.create_kernel(update, target=dh.default_target).compile() div_kernel = ps.create_kernel(update, target=dh.default_target, cpu_openmp=openmp).compile()
def time_loop(steps): def time_loop(steps):
sync = dh.synchronization_function([c.name]) sync = dh.synchronization_function([c.name])
...@@ -74,6 +74,9 @@ class TestStaggeredDiffusion: ...@@ -74,6 +74,9 @@ class TestStaggeredDiffusion:
import pystencils.opencl.autoinit import pystencils.opencl.autoinit
self._run(4, 'opencl') self._run(4, 'opencl')
def test_diffusion_openmp(self):
self._run(4, openmp=True)
def test_staggered_subexpressions(): def test_staggered_subexpressions():
dh = ps.create_data_handling((10, 10), periodicity=True, default_target='cpu') dh = ps.create_data_handling((10, 10), periodicity=True, default_target='cpu')
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment