From ca72adf4f8d975a1e81e6681e32674bde0e2ca74 Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Fri, 19 Oct 2018 08:47:50 +0200 Subject: [PATCH] walberla integration + bugfix in GPU block indexing --- backends/cbackend.py | 14 +++++++------- data_types.py | 2 +- gpucuda/indexing.py | 22 ++++++++++++++-------- integer_functions.py | 29 +++++++++++++++++++++++++++++ 4 files changed, 51 insertions(+), 16 deletions(-) diff --git a/backends/cbackend.py b/backends/cbackend.py index 45c31142c..54be489c5 100644 --- a/backends/cbackend.py +++ b/backends/cbackend.py @@ -2,7 +2,7 @@ import sympy as sp from collections import namedtuple from sympy.core import S from typing import Set - +from sympy.printing.ccode import C89CodePrinter try: from sympy.printing.ccode import C99CodePrinter as CCodePrinter except ImportError: @@ -233,7 +233,7 @@ class CustomSympyPrinter(CCodePrinter): return result.replace("\n", "") def _print_Function(self, expr): - function_map = { + infix_functions = { bitwise_xor: '^', bit_shift_right: '>>', bit_shift_left: '<<', @@ -248,11 +248,8 @@ class CustomSympyPrinter(CCodePrinter): return self._typed_number(arg, data_type) else: return "*((%s)(& %s))" % (PointerType(data_type), self._print(arg)) - elif expr.func == modulo_floor: - assert all(get_type_of_expression(e).is_int() for e in expr.args) - return "({dtype})({0} / {1}) * {1}".format(*expr.args, dtype=get_type_of_expression(expr.args[0])) - elif expr.func in function_map: - return "(%s %s %s)" % (self._print(expr.args[0]), function_map[expr.func], self._print(expr.args[1])) + elif expr.func in infix_functions: + return "(%s %s %s)" % (self._print(expr.args[0]), infix_functions[expr.func], self._print(expr.args[1])) else: return super(CustomSympyPrinter, self)._print_Function(expr) @@ -268,6 +265,9 @@ class CustomSympyPrinter(CCodePrinter): else: return res + _print_Max = C89CodePrinter._print_Max + _print_Min = C89CodePrinter._print_Min + # noinspection PyPep8Naming class VectorizedCustomSympyPrinter(CustomSympyPrinter): diff --git a/data_types.py b/data_types.py index 63d2ae560..a992d3b10 100644 --- a/data_types.py +++ b/data_types.py @@ -300,7 +300,7 @@ def get_type_of_expression(expr): elif isinstance(expr, TypedSymbol): return expr.dtype elif isinstance(expr, sp.Symbol): - raise ValueError("All symbols inside this expression have to be typed!") + raise ValueError("All symbols inside this expression have to be typed! ", str(expr)) elif isinstance(expr, cast_func): return expr.args[1] elif hasattr(expr, 'func') and expr.func == sp.Piecewise: diff --git a/gpucuda/indexing.py b/gpucuda/indexing.py index c85c411e4..979083c7e 100644 --- a/gpucuda/indexing.py +++ b/gpucuda/indexing.py @@ -2,6 +2,7 @@ import abc from typing import Tuple # noqa import sympy as sp from pystencils.astnodes import Conditional, Block +from pystencils.integer_functions import div_ceil from pystencils.slicing import normalize_slice from pystencils.data_types import TypedSymbol, create_type from functools import partial @@ -70,10 +71,10 @@ class BlockIndexing(AbstractIndexing): iteration_slice: slice that defines rectangular subarea which is iterated over permute_block_size_dependent_on_layout: if True the block_size is permuted such that the fastest coordinate gets the largest amount of threads + compile_time_block_size: compile in concrete block size, otherwise the cuda variable 'blockDim' is used """ - def __init__(self, field, iteration_slice=None, - block_size=(16, 16, 1), permute_block_size_dependent_on_layout=True): + block_size=(16, 16, 1), permute_block_size_dependent_on_layout=True, compile_time_block_size=False): if field.spatial_dimensions > 3: raise NotImplementedError("This indexing scheme supports at most 3 spatial dimensions") @@ -83,16 +84,18 @@ class BlockIndexing(AbstractIndexing): if AUTO_BLOCK_SIZE_LIMITING: block_size = self.limit_block_size_to_device_maximum(block_size) - self._blockSize = block_size + self._block_size = block_size self._iterationSlice = normalize_slice(iteration_slice, field.spatial_shape) self._dim = field.spatial_dimensions self._symbolicShape = [e if isinstance(e, sp.Basic) else None for e in field.spatial_shape] + self._compile_time_block_size = compile_time_block_size @property def coordinates(self): offsets = _get_start_from_slice(self._iterationSlice) + block_size = self._block_size if self._compile_time_block_size else BLOCK_DIM coordinates = [block_index * bs + thread_idx + off - for block_index, bs, thread_idx, off in zip(BLOCK_IDX, self._blockSize, THREAD_IDX, offsets)] + for block_index, bs, thread_idx, off in zip(BLOCK_IDX, block_size, THREAD_IDX, offsets)] return coordinates[:self._dim] @@ -102,13 +105,16 @@ class BlockIndexing(AbstractIndexing): widths = [end - start for start, end in zip(_get_start_from_slice(self._iterationSlice), _get_end_from_slice(self._iterationSlice, arr_shape))] widths = sp.Matrix(widths).subs(substitution_dict) + extend_bs = (1,) * (3 - len(self._block_size)) + block_size = self._block_size + extend_bs + if not self._compile_time_block_size: + block_size = [sp.Min(bs, shape) for bs, shape in zip(block_size, widths)] - grid = tuple(sp.ceiling(length / block_size) - for length, block_size in zip(widths, self._blockSize)) # type: : Tuple[int, ...] - extend_bs = (1,) * (3 - len(self._blockSize)) + grid = tuple(div_ceil(length, block_size) + for length, block_size in zip(widths, block_size)) extend_gr = (1,) * (3 - len(grid)) - return {'block': self._blockSize + extend_bs, + return {'block': block_size, 'grid': grid + extend_gr} def guard(self, kernel_content, arr_shape): diff --git a/integer_functions.py b/integer_functions.py index 16fa97a8c..9369f236c 100644 --- a/integer_functions.py +++ b/integer_functions.py @@ -70,3 +70,32 @@ class modulo_ceil(sp.Function): assert dtype.is_int() code = "(({0}) % ({1}) == 0 ? {0} : (({dtype})(({0}) / ({1}))+1) * ({1}))" return code.format(print_func(self.args[0]), print_func(self.args[1]), dtype=dtype) + + +# noinspection PyPep8Naming +class div_ceil(sp.Function): + """Integer division that is always rounded up + + Examples: + >>> div_ceil(9, 4) + 3 + >>> div_ceil(8, 4) + 2 + >>> from pystencils import TypedSymbol + >>> a, b = TypedSymbol("a", "int64"), TypedSymbol("b", "int32") + >>> div_ceil(a, b).to_c(str) + '( (a) % (b) == 0 ? (int64_t)(a) / (int64_t)(b) : ( (int64_t)(a) / (int64_t)(b) ) +1 )' + """ + nargs = 2 + + def __new__(cls, integer, divisor): + if is_integer_sequence((integer, divisor)): + return integer // divisor if integer % divisor == 0 else (integer // divisor) + 1 + else: + return super().__new__(cls, integer, divisor) + + def to_c(self, print_func): + dtype = collate_types((get_type_of_expression(self.args[0]), get_type_of_expression(self.args[1]))) + assert dtype.is_int() + code = "( ({0}) % ({1}) == 0 ? ({dtype})({0}) / ({dtype})({1}) : ( ({dtype})({0}) / ({dtype})({1}) ) +1 )" + return code.format(print_func(self.args[0]), print_func(self.args[1]), dtype=dtype) -- GitLab