Skip to content
Snippets Groups Projects
Commit f756b4d4 authored by Markus Holzer's avatar Markus Holzer
Browse files

First GPU Platform support

parent 6336f73d
Branches
Tags
No related merge requests found
Pipeline #64558 failed with stages
in 3 minutes and 55 seconds
from typing import Callable, Any
import operator
from .expressions import PsExpression
from .astnode import PsAstNode
from .util import failing_cast
class PsLogicalExpression(PsExpression):
__match_args__ = ("operand1", "operand2")
def __init__(self, op1: PsExpression, op2: PsExpression):
self._op1 = op1
self._op2 = op2
@property
def operand1(self) -> PsExpression:
return self._op1
@operand1.setter
def operand1(self, expr: PsExpression):
self._op1 = expr
@property
def operand2(self) -> PsExpression:
return self._op2
@operand2.setter
def operand2(self, expr: PsExpression):
self._op2 = expr
def clone(self):
return type(self)(self._op1.clone(), self._op2.clone())
def get_children(self) -> tuple[PsAstNode, ...]:
return self._op1, self._op2
def set_child(self, idx: int, c: PsAstNode):
idx = [0, 1][idx]
match idx:
case 0:
self._op1 = failing_cast(PsExpression, c)
case 1:
self._op2 = failing_cast(PsExpression, c)
def __repr__(self) -> str:
opname = self.__class__.__name__
return f"{opname}({repr(self._op1)}, {repr(self._op2)})"
@property
def python_operator(self) -> None | Callable[[Any, Any], Any]:
return None
class PsAnd(PsLogicalExpression):
@property
def python_operator(self) -> Callable[[Any, Any], Any] | None:
return operator.and_
class PsEq(PsLogicalExpression):
@property
def python_operator(self) -> Callable[[Any, Any], Any] | None:
return operator.eq
class PsGe(PsLogicalExpression):
@property
def python_operator(self) -> Callable[[Any, Any], Any] | None:
return operator.ge
class PsGt(PsLogicalExpression):
@property
def python_operator(self) -> Callable[[Any, Any], Any] | None:
return operator.gt
class PsLe(PsLogicalExpression):
@property
def python_operator(self) -> Callable[[Any, Any], Any] | None:
return operator.le
class PsLt(PsLogicalExpression):
@property
def python_operator(self) -> Callable[[Any, Any], Any] | None:
return operator.lt
class PsNe(PsLogicalExpression):
@property
def python_operator(self) -> Callable[[Any, Any], Any] | None:
return operator.ne
from .platform import Platform from .platform import Platform
from .generic_cpu import GenericCpu, GenericVectorCpu from .generic_cpu import GenericCpu, GenericVectorCpu
from .generic_gpu import GenericGpu
from .x86 import X86VectorCpu, X86VectorArch from .x86 import X86VectorCpu, X86VectorArch
__all__ = [ __all__ = [
...@@ -8,4 +9,5 @@ __all__ = [ ...@@ -8,4 +9,5 @@ __all__ = [
"GenericVectorCpu", "GenericVectorCpu",
"X86VectorCpu", "X86VectorCpu",
"X86VectorArch", "X86VectorArch",
"GenericGpu"
] ]
from .platform import Platform
from ..kernelcreation.iteration_space import (
IterationSpace,
FullIterationSpace,
SparseIterationSpace,
)
from ..ast.structural import PsBlock, PsConditional
from ..ast.expressions import (
PsSymbolExpr,
PsAdd,
)
from ..ast.logical_expressions import PsLt, PsAnd
from ...types import PsSignedIntegerType
from ..symbols import PsSymbol
int32 = PsSignedIntegerType(width=32, const=False)
BLOCK_IDX = [PsSymbolExpr(PsSymbol(f"blockIdx.{coord}", int32)) for coord in ('x', 'y', 'z')]
THREAD_IDX = [PsSymbolExpr(PsSymbol(f"threadIdx.{coord}", int32)) for coord in ('x', 'y', 'z')]
BLOCK_DIM = [PsSymbolExpr(PsSymbol(f"blockDim.{coord}", int32)) for coord in ('x', 'y', 'z')]
GRID_DIM = [PsSymbolExpr(PsSymbol(f"gridDim.{coord}", int32)) for coord in ('x', 'y', 'z')]
class GenericGpu(Platform):
@property
def required_headers(self) -> set[str]:
return {"gpu_defines.h"}
def materialize_iteration_space(self, body: PsBlock, ispace: IterationSpace) -> PsBlock:
if isinstance(ispace, FullIterationSpace):
return self._guard_full_iteration_space(body, ispace)
else:
assert False, "unreachable code"
def cuda_indices(self, dim):
block_size = BLOCK_DIM
indices = [block_index * bs + thread_idx
for block_index, bs, thread_idx in zip(BLOCK_IDX, block_size, THREAD_IDX)]
return indices[:dim]
# Internals
def _guard_full_iteration_space(self, body: PsBlock, ispace: FullIterationSpace) -> PsBlock:
dimensions = ispace.dimensions
# Determine loop order by permuting dimensions
archetype_field = ispace.archetype_field
if archetype_field is not None:
loop_order = archetype_field.layout
dimensions = [dimensions[coordinate] for coordinate in loop_order]
start = [PsAdd(c, d.start) for c, d in zip(self.cuda_indices(len(dimensions)), dimensions[::-1])]
conditions = [PsLt(c, d.stop) for c, d in zip(start, dimensions[::-1])]
condition = conditions[0]
for c in conditions[1:]:
condition = PsAnd(condition, c)
return PsBlock([PsConditional(condition, body)])
...@@ -151,16 +151,16 @@ class FieldShapeSymbol(TypedSymbol): ...@@ -151,16 +151,16 @@ class FieldShapeSymbol(TypedSymbol):
return obj return obj
def __getnewargs__(self): def __getnewargs__(self):
return self.field_names, self.coordinate return self.field_name, self.coordinate
def __getnewargs_ex__(self): def __getnewargs_ex__(self):
return (self.field_names, self.coordinate), {} return (self.field_name, self.coordinate), {}
__xnew__ = staticmethod(__new_stage2__) __xnew__ = staticmethod(__new_stage2__)
__xnew_cached_ = staticmethod(sp.core.cacheit(__new_stage2__)) __xnew_cached_ = staticmethod(sp.core.cacheit(__new_stage2__))
def _hashable_content(self): def _hashable_content(self):
return super()._hashable_content(), self.coordinate, self.field_names return super()._hashable_content(), self.coordinate, self.field_name
class FieldPointerSymbol(TypedSymbol): class FieldPointerSymbol(TypedSymbol):
......
import pytest
from pystencils.field import Field
from pystencils.backend.kernelcreation import (
KernelCreationContext,
FullIterationSpace
)
from pystencils.backend.ast.structural import PsBlock, PsLoop, PsComment
from pystencils.backend.ast.expressions import PsExpression
from pystencils.backend.ast import dfs_preorder
from pystencils.backend.platforms import GenericGpu
@pytest.mark.parametrize("layout", ["fzyx", "zyxf", "c", "f"])
def test_loop_nest(layout):
ctx = KernelCreationContext()
body = PsBlock([PsComment("Loop body goes here")])
platform = GenericGpu(ctx)
# FZYX Order
archetype_field = Field.create_generic("fzyx_field", spatial_dimensions=3, layout=layout)
ispace = FullIterationSpace.create_with_ghost_layers(ctx, archetype_field, 0)
condition = platform.materialize_iteration_space(body, ispace)
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment