Skip to content
Snippets Groups Projects
Commit a274d662 authored by Martin Bauer's avatar Martin Bauer
Browse files

Staggered Kernel: different option for GPU (one block for each code path)

parent 8e4aae93
Branches
Tags
No related merge requests found
...@@ -67,6 +67,7 @@ def make_python_function(kernel_function_node, argument_dict=None): ...@@ -67,6 +67,7 @@ def make_python_function(kernel_function_node, argument_dict=None):
cache[key] = (args, block_and_thread_numbers) cache[key] = (args, block_and_thread_numbers)
cache_values.append(kwargs) # keep objects alive such that ids remain unique cache_values.append(kwargs) # keep objects alive such that ids remain unique
func(*args, **block_and_thread_numbers) func(*args, **block_and_thread_numbers)
# import pycuda.driver as cuda
# cuda.Context.synchronize() # useful for debugging, to get errors right after kernel was called # cuda.Context.synchronize() # useful for debugging, to get errors right after kernel was called
wrapper.ast = kernel_function_node wrapper.ast = kernel_function_node
wrapper.parameters = kernel_function_node.get_parameters() wrapper.parameters = kernel_function_node.get_parameters()
......
from types import MappingProxyType from types import MappingProxyType
import sympy as sp import sympy as sp
import itertools
from pystencils.assignment import Assignment from pystencils.assignment import Assignment
from pystencils.astnodes import LoopOverCoordinate, Conditional, Block, SympyAssignment from pystencils.astnodes import LoopOverCoordinate, Conditional, Block, SympyAssignment
from pystencils.cpu.vectorization import vectorize from pystencils.cpu.vectorization import vectorize
...@@ -158,7 +159,8 @@ def create_indexed_kernel(assignments, index_fields, target='cpu', data_type="do ...@@ -158,7 +159,8 @@ def create_indexed_kernel(assignments, index_fields, target='cpu', data_type="do
raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,)) raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,))
def create_staggered_kernel(staggered_field, expressions, subexpressions=(), target='cpu', **kwargs): def create_staggered_kernel(staggered_field, expressions, subexpressions=(), target='cpu',
gpu_exclusive_conditions=False, **kwargs):
"""Kernel that updates a staggered field. """Kernel that updates a staggered field.
.. image:: /img/staggered_grid.svg .. image:: /img/staggered_grid.svg
...@@ -173,6 +175,7 @@ def create_staggered_kernel(staggered_field, expressions, subexpressions=(), tar ...@@ -173,6 +175,7 @@ def create_staggered_kernel(staggered_field, expressions, subexpressions=(), tar
should be updated. should be updated.
subexpressions: optional sequence of Assignments, that define subexpressions used in the main expressions subexpressions: optional sequence of Assignments, that define subexpressions used in the main expressions
target: 'cpu' or 'gpu' target: 'cpu' or 'gpu'
gpu_exclusive_conditions: if/else construct to have only one code block for each of 2**dim code paths
kwargs: passed directly to create_kernel, iteration slice and ghost_layers parameters are not allowed kwargs: passed directly to create_kernel, iteration slice and ghost_layers parameters are not allowed
Returns: Returns:
...@@ -191,18 +194,41 @@ def create_staggered_kernel(staggered_field, expressions, subexpressions=(), tar ...@@ -191,18 +194,41 @@ def create_staggered_kernel(staggered_field, expressions, subexpressions=(), tar
"same length." "same length."
final_assignments = [] final_assignments = []
for d in range(dim): last_conditional = None
cond = sp.And(*[conditions[i] for i in range(dim) if d != i])
def add(condition, dimensions, as_else_block=False):
nonlocal last_conditional
if staggered_field.index_dimensions == 1: if staggered_field.index_dimensions == 1:
assignments = [Assignment(staggered_field(d), expressions[d])] assignments = [Assignment(staggered_field(d), expressions[d]) for d in dimensions]
a_coll = AssignmentCollection(assignments, list(subexpressions)).new_filtered([staggered_field(d)]) a_coll = AssignmentCollection(assignments, list(subexpressions))
a_coll = a_coll.new_filtered([staggered_field(d) for d in dimensions])
elif staggered_field.index_dimensions == 2: elif staggered_field.index_dimensions == 2:
assert staggered_field.has_fixed_index_shape assert staggered_field.has_fixed_index_shape
assignments = [Assignment(staggered_field(d, i), expr) for i, expr in enumerate(expressions[d])] assignments = [Assignment(staggered_field(d, i), expr)
for d in dimensions
for i, expr in enumerate(expressions[d])]
a_coll = AssignmentCollection(assignments, list(subexpressions)) a_coll = AssignmentCollection(assignments, list(subexpressions))
a_coll = a_coll.new_filtered([staggered_field(d, i) for i in range(staggered_field.index_shape[1])]) a_coll = a_coll.new_filtered([staggered_field(d, i) for i in range(staggered_field.index_shape[1])
for d in dimensions])
sp_assignments = [SympyAssignment(a.lhs, a.rhs) for a in a_coll.all_assignments] sp_assignments = [SympyAssignment(a.lhs, a.rhs) for a in a_coll.all_assignments]
final_assignments.append(Conditional(cond, Block(sp_assignments))) if as_else_block and last_conditional:
last_conditional.false_block = Conditional(condition, Block(sp_assignments))
last_conditional = last_conditional.false_block
else:
last_conditional = Conditional(condition, Block(sp_assignments))
final_assignments.append(last_conditional)
if target == 'cpu' or not gpu_exclusive_conditions:
for d in range(dim):
cond = sp.And(*[conditions[i] for i in range(dim) if d != i])
add(cond, [d])
elif target == 'gpu':
full_conditions = [sp.And(*[conditions[i] for i in range(dim) if d != i]) for d in range(dim)]
for include in itertools.product(*[[1, 0]] * dim):
case_conditions = sp.And(*[c if value else sp.Not(c) for c, value in zip(full_conditions, include)])
dimensions_to_include = [i for i in range(dim) if include[i]]
if dimensions_to_include:
add(case_conditions, dimensions_to_include, True)
ghost_layers = [(1, 0)] * dim ghost_layers = [(1, 0)] * dim
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment