From f756b4d4aa583da818d8f4930019141afce6332b Mon Sep 17 00:00:00 2001 From: Markus Holzer <markus.holzer@fau.de> Date: Tue, 19 Mar 2024 11:04:05 +0100 Subject: [PATCH] First GPU Platform support --- .../backend/ast/logical_expressions.py | 94 +++++++++++++++++++ src/pystencils/backend/platforms/__init__.py | 2 + .../backend/platforms/generic_gpu.py | 64 +++++++++++++ src/pystencils/sympyextensions/typed_sympy.py | 6 +- .../kernelcreation/platform/test_basic_gpu.py | 28 ++++++ 5 files changed, 191 insertions(+), 3 deletions(-) create mode 100644 src/pystencils/backend/ast/logical_expressions.py create mode 100644 src/pystencils/backend/platforms/generic_gpu.py create mode 100644 tests/nbackend/kernelcreation/platform/test_basic_gpu.py diff --git a/src/pystencils/backend/ast/logical_expressions.py b/src/pystencils/backend/ast/logical_expressions.py new file mode 100644 index 000000000..49fbf68f0 --- /dev/null +++ b/src/pystencils/backend/ast/logical_expressions.py @@ -0,0 +1,94 @@ +from typing import Callable, Any +import operator + +from .expressions import PsExpression +from .astnode import PsAstNode +from .util import failing_cast + + +class PsLogicalExpression(PsExpression): + __match_args__ = ("operand1", "operand2") + + def __init__(self, op1: PsExpression, op2: PsExpression): + self._op1 = op1 + self._op2 = op2 + + @property + def operand1(self) -> PsExpression: + return self._op1 + + @operand1.setter + def operand1(self, expr: PsExpression): + self._op1 = expr + + @property + def operand2(self) -> PsExpression: + return self._op2 + + @operand2.setter + def operand2(self, expr: PsExpression): + self._op2 = expr + + def clone(self): + return type(self)(self._op1.clone(), self._op2.clone()) + + def get_children(self) -> tuple[PsAstNode, ...]: + return self._op1, self._op2 + + def set_child(self, idx: int, c: PsAstNode): + idx = [0, 1][idx] + match idx: + case 0: + self._op1 = failing_cast(PsExpression, c) + case 1: + self._op2 = failing_cast(PsExpression, c) + + def __repr__(self) -> str: + opname = self.__class__.__name__ + return f"{opname}({repr(self._op1)}, {repr(self._op2)})" + + @property + def python_operator(self) -> None | Callable[[Any, Any], Any]: + return None + + +class PsAnd(PsLogicalExpression): + @property + def python_operator(self) -> Callable[[Any, Any], Any] | None: + return operator.and_ + + +class PsEq(PsLogicalExpression): + @property + def python_operator(self) -> Callable[[Any, Any], Any] | None: + return operator.eq + + +class PsGe(PsLogicalExpression): + @property + def python_operator(self) -> Callable[[Any, Any], Any] | None: + return operator.ge + + +class PsGt(PsLogicalExpression): + @property + def python_operator(self) -> Callable[[Any, Any], Any] | None: + return operator.gt + + +class PsLe(PsLogicalExpression): + @property + def python_operator(self) -> Callable[[Any, Any], Any] | None: + return operator.le + + +class PsLt(PsLogicalExpression): + @property + def python_operator(self) -> Callable[[Any, Any], Any] | None: + return operator.lt + + +class PsNe(PsLogicalExpression): + @property + def python_operator(self) -> Callable[[Any, Any], Any] | None: + return operator.ne diff --git a/src/pystencils/backend/platforms/__init__.py b/src/pystencils/backend/platforms/__init__.py index 61db873d2..355c28d8f 100644 --- a/src/pystencils/backend/platforms/__init__.py +++ b/src/pystencils/backend/platforms/__init__.py @@ -1,5 +1,6 @@ from .platform import Platform from .generic_cpu import GenericCpu, GenericVectorCpu +from .generic_gpu import GenericGpu from .x86 import X86VectorCpu, X86VectorArch __all__ = [ @@ -8,4 +9,5 @@ __all__ = [ "GenericVectorCpu", "X86VectorCpu", "X86VectorArch", + "GenericGpu" ] diff --git a/src/pystencils/backend/platforms/generic_gpu.py b/src/pystencils/backend/platforms/generic_gpu.py new file mode 100644 index 000000000..1e7d958f7 --- /dev/null +++ b/src/pystencils/backend/platforms/generic_gpu.py @@ -0,0 +1,64 @@ +from .platform import Platform + +from ..kernelcreation.iteration_space import ( + IterationSpace, + FullIterationSpace, + SparseIterationSpace, +) + +from ..ast.structural import PsBlock, PsConditional +from ..ast.expressions import ( + PsSymbolExpr, + PsAdd, +) +from ..ast.logical_expressions import PsLt, PsAnd +from ...types import PsSignedIntegerType +from ..symbols import PsSymbol + +int32 = PsSignedIntegerType(width=32, const=False) + +BLOCK_IDX = [PsSymbolExpr(PsSymbol(f"blockIdx.{coord}", int32)) for coord in ('x', 'y', 'z')] +THREAD_IDX = [PsSymbolExpr(PsSymbol(f"threadIdx.{coord}", int32)) for coord in ('x', 'y', 'z')] +BLOCK_DIM = [PsSymbolExpr(PsSymbol(f"blockDim.{coord}", int32)) for coord in ('x', 'y', 'z')] +GRID_DIM = [PsSymbolExpr(PsSymbol(f"gridDim.{coord}", int32)) for coord in ('x', 'y', 'z')] + + +class GenericGpu(Platform): + + @property + def required_headers(self) -> set[str]: + return {"gpu_defines.h"} + + def materialize_iteration_space(self, body: PsBlock, ispace: IterationSpace) -> PsBlock: + if isinstance(ispace, FullIterationSpace): + return self._guard_full_iteration_space(body, ispace) + else: + assert False, "unreachable code" + + def cuda_indices(self, dim): + block_size = BLOCK_DIM + indices = [block_index * bs + thread_idx + for block_index, bs, thread_idx in zip(BLOCK_IDX, block_size, THREAD_IDX)] + + return indices[:dim] + + # Internals + def _guard_full_iteration_space(self, body: PsBlock, ispace: FullIterationSpace) -> PsBlock: + + dimensions = ispace.dimensions + + # Determine loop order by permuting dimensions + archetype_field = ispace.archetype_field + if archetype_field is not None: + loop_order = archetype_field.layout + dimensions = [dimensions[coordinate] for coordinate in loop_order] + + start = [PsAdd(c, d.start) for c, d in zip(self.cuda_indices(len(dimensions)), dimensions[::-1])] + conditions = [PsLt(c, d.stop) for c, d in zip(start, dimensions[::-1])] + + condition = conditions[0] + for c in conditions[1:]: + condition = PsAnd(condition, c) + + return PsBlock([PsConditional(condition, body)]) + diff --git a/src/pystencils/sympyextensions/typed_sympy.py b/src/pystencils/sympyextensions/typed_sympy.py index 949aa6991..541f9aed7 100644 --- a/src/pystencils/sympyextensions/typed_sympy.py +++ b/src/pystencils/sympyextensions/typed_sympy.py @@ -151,16 +151,16 @@ class FieldShapeSymbol(TypedSymbol): return obj def __getnewargs__(self): - return self.field_names, self.coordinate + return self.field_name, self.coordinate def __getnewargs_ex__(self): - return (self.field_names, self.coordinate), {} + return (self.field_name, self.coordinate), {} __xnew__ = staticmethod(__new_stage2__) __xnew_cached_ = staticmethod(sp.core.cacheit(__new_stage2__)) def _hashable_content(self): - return super()._hashable_content(), self.coordinate, self.field_names + return super()._hashable_content(), self.coordinate, self.field_name class FieldPointerSymbol(TypedSymbol): diff --git a/tests/nbackend/kernelcreation/platform/test_basic_gpu.py b/tests/nbackend/kernelcreation/platform/test_basic_gpu.py new file mode 100644 index 000000000..df0e48fb0 --- /dev/null +++ b/tests/nbackend/kernelcreation/platform/test_basic_gpu.py @@ -0,0 +1,28 @@ +import pytest + +from pystencils.field import Field + +from pystencils.backend.kernelcreation import ( + KernelCreationContext, + FullIterationSpace +) + +from pystencils.backend.ast.structural import PsBlock, PsLoop, PsComment +from pystencils.backend.ast.expressions import PsExpression +from pystencils.backend.ast import dfs_preorder + +from pystencils.backend.platforms import GenericGpu + + +@pytest.mark.parametrize("layout", ["fzyx", "zyxf", "c", "f"]) +def test_loop_nest(layout): + ctx = KernelCreationContext() + + body = PsBlock([PsComment("Loop body goes here")]) + platform = GenericGpu(ctx) + + # FZYX Order + archetype_field = Field.create_generic("fzyx_field", spatial_dimensions=3, layout=layout) + ispace = FullIterationSpace.create_with_ghost_layers(ctx, archetype_field, 0) + + condition = platform.materialize_iteration_space(body, ispace) -- GitLab