diff --git a/conftest.py b/conftest.py index 742ff7caa8a1825d1073d8342ce5cb1a70b5de46..d4b323f73f88a8ac7ef3a033047f8792157e7b36 100644 --- a/conftest.py +++ b/conftest.py @@ -48,7 +48,7 @@ add_path_to_ignore('_local_tmp') try: import cupy except ImportError: - collect_ignore += [os.path.join(SCRIPT_FOLDER, "tests/test_gpu.py")] + collect_ignore += [os.path.join(SCRIPT_FOLDER, "tests/nbackend/kernelcreation/test_gpu.py")] add_path_to_ignore('src/pystencils/gpu') try: diff --git a/src/pystencils/backend/jit/__init__.py b/src/pystencils/backend/jit/__init__.py index edc755dfd18e7ef27bcef04e34e44222752e93f6..f382f074992c4197dc3f92b4f2d2ad9e063851dc 100644 --- a/src/pystencils/backend/jit/__init__.py +++ b/src/pystencils/backend/jit/__init__.py @@ -26,7 +26,8 @@ Both are available here through `LegacyCpuJit` and `LegacyGpuJit`. """ -from .jit import JitBase, NoJit, KernelWrapper, LegacyCpuJit, LegacyGpuJit +from .jit import JitBase, NoJit, KernelWrapper +from .legacy_cpu import LegacyCpuJit from .gpu_cupy import CupyJit no_jit = NoJit() @@ -38,6 +39,5 @@ __all__ = [ "LegacyCpuJit", "NoJit", "no_jit", - "LegacyGpuJit", "CupyJit", ] diff --git a/src/pystencils/backend/jit/gpu_cupy.py b/src/pystencils/backend/jit/gpu_cupy.py index 0c9b7b8a9ee199401cac05247fdb4627f9ae256e..d6aaac2d2ee26901b73ae9d2b6daaba06c055bd0 100644 --- a/src/pystencils/backend/jit/gpu_cupy.py +++ b/src/pystencils/backend/jit/gpu_cupy.py @@ -1,4 +1,4 @@ -from typing import Callable, Any +from typing import Any, Sequence, cast from dataclasses import dataclass try: @@ -22,6 +22,7 @@ from ..kernelfunction import ( KernelParameter, ) from ..emission import emit_code +from ...types import PsStructType from ...include import get_pystencils_include_path @@ -73,14 +74,7 @@ class CupyKernelWrapper(KernelWrapper): return devices.pop() def _get_cached_args(self, **kwargs): - key = tuple( - ( - (k, v.dtype, v.strides, v.shape) - if isinstance(v, cp.ndarray) - else (k, id(v)) - ) - for k, v in kwargs.items() - ) + key = (self._block_size,) + tuple((k, id(v)) for k, v in kwargs.items()) if key not in self._args_cache: args = self._get_args(**kwargs) @@ -109,6 +103,10 @@ class CupyKernelWrapper(KernelWrapper): if field.has_fixed_shape: expected_shape = tuple(int(s) for s in field.shape) + if isinstance(field.dtype, PsStructType): + assert expected_shape[-1] == 1 + expected_shape = expected_shape[:-1] + actual_shape = arr.shape if expected_shape != actual_shape: raise ValueError( @@ -117,6 +115,10 @@ class CupyKernelWrapper(KernelWrapper): ) expected_strides = tuple(int(s) for s in field.strides) + if isinstance(field.dtype, PsStructType): + assert expected_strides[-1] == 1 + expected_strides = expected_strides[:-1] + actual_strides = tuple(s // arr.dtype.itemsize for s in arr.strides) if expected_strides != actual_strides: raise ValueError( @@ -126,7 +128,7 @@ class CupyKernelWrapper(KernelWrapper): match field.field_type: case FieldType.GENERIC: - field_shapes.add(arr.shape) + field_shapes.add(arr.shape[: field.spatial_dimensions]) if len(field_shapes) > 1: raise ValueError( @@ -200,11 +202,20 @@ class CupyKernelWrapper(KernelWrapper): class CupyJit(JitBase): - def __init__(self, default_block_size: tuple[int, int, int] = (128, 2, 1)): + def __init__(self, default_block_size: Sequence[int] = (128, 2, 1)): self._runtime_headers = {"<cstdint>"} - self._default_block_size = default_block_size - def compile(self, kfunc: KernelFunction) -> Callable[..., None]: + if len(default_block_size) > 3: + raise ValueError( + f"Invalid block size: {default_block_size}. Must be at most three-dimensional." + ) + + self._default_block_size: tuple[int, int, int] = cast( + tuple[int, int, int], + tuple(default_block_size) + (1,) * (3 - len(default_block_size)), + ) + + def compile(self, kfunc: KernelFunction) -> KernelWrapper: if not HAVE_CUPY: raise JitError( "`cupy` is not installed: just-in-time-compilation of CUDA kernels is unavailable." diff --git a/src/pystencils/backend/jit/jit.py b/src/pystencils/backend/jit/jit.py index b455c368054c26f7ed6b86893bcb9dca8b877daf..2184245707e861052dce61e7f8f12b720fa37922 100644 --- a/src/pystencils/backend/jit/jit.py +++ b/src/pystencils/backend/jit/jit.py @@ -1,60 +1,61 @@ from __future__ import annotations -from typing import Callable, TYPE_CHECKING +from typing import Sequence, TYPE_CHECKING from abc import ABC, abstractmethod if TYPE_CHECKING: - from ..kernelfunction import KernelFunction + from ..kernelfunction import KernelFunction, KernelParameter + from ...enums import Target class JitError(Exception): """Indicates an error during just-in-time compilation""" -class JitBase(ABC): - """Base class for just-in-time compilation interfaces implemented in pystencils.""" +class KernelWrapper(ABC): + """Wrapper around a compiled and executable pystencils kernel.""" - @abstractmethod - def compile(self, kernel: KernelFunction) -> Callable[..., None]: - """Compile a kernel function and return a callable object which invokes the kernel.""" - - -class KernelWrapper: def __init__(self, kfunc: KernelFunction) -> None: self._kfunc = kfunc + @abstractmethod + def __call__(self, **kwargs) -> None: + pass + @property def kernel_function(self) -> KernelFunction: return self._kfunc + + @property + def ast(self) -> KernelFunction: + return self._kfunc + + @property + def target(self) -> Target: + return self._kfunc.target + + @property + def parameters(self) -> Sequence[KernelParameter]: + return self._kfunc.parameters @property def code(self) -> str: from pystencils.display_utils import get_code_str - return get_code_str(str) + return get_code_str(self._kfunc) + + +class JitBase(ABC): + """Base class for just-in-time compilation interfaces implemented in pystencils.""" + + @abstractmethod + def compile(self, kernel: KernelFunction) -> KernelWrapper: + """Compile a kernel function and return a callable object which invokes the kernel.""" class NoJit(JitBase): """Not a JIT compiler: Used to explicitly disable JIT compilation on an AST.""" - def compile(self, kernel: KernelFunction) -> Callable[..., None]: + def compile(self, kernel: KernelFunction) -> KernelWrapper: raise JitError( "Just-in-time compilation of this kernel was explicitly disabled." ) - - -class LegacyCpuJit(JitBase): - """Wrapper around ``pystencils.cpu.cpujit``""" - - def compile(self, kernel: KernelFunction) -> Callable[..., None]: - from .legacy_cpu import compile_and_load - - return compile_and_load(kernel) - - -class LegacyGpuJit(JitBase): - """Wrapper around ``pystencils.gpu.gpujit``""" - - def compile(self, kernel: KernelFunction) -> Callable[..., None]: - from ...old.gpu.gpujit import make_python_function - - return make_python_function(kernel) diff --git a/src/pystencils/backend/jit/legacy_cpu.py b/src/pystencils/backend/jit/legacy_cpu.py index ca57011605437f292ddcd04eac9a4e97682deb22..1acd1b22ad48ac0564d255314bd6603405421fdc 100644 --- a/src/pystencils/backend/jit/legacy_cpu.py +++ b/src/pystencils/backend/jit/legacy_cpu.py @@ -47,6 +47,8 @@ Then 'cl.exe' is used to compile. from appdirs import user_cache_dir, user_config_dir from collections import OrderedDict +from typing import Callable + import importlib.util import json import os @@ -60,14 +62,34 @@ import warnings from ..kernelfunction import KernelFunction +from .jit import JitBase, KernelWrapper from .cpu_extension_module import PsKernelExtensioNModule from .msvc_detection import get_environment from pystencils.include import get_pystencils_include_path -from pystencils.kernel_wrapper import KernelWrapper from pystencils.utils import atomic_file_write, recursive_dict_update +class CpuKernelWrapper(KernelWrapper): + def __init__(self, kfunc: KernelFunction, compiled_kernel: Callable[..., None]) -> None: + super().__init__(kfunc) + self._compiled_kernel = compiled_kernel + + def __call__(self, **kwargs) -> None: + self._compiled_kernel(**kwargs) + + @property + def kernel(self) -> Callable[..., None]: + return self._compiled_kernel + + +class LegacyCpuJit(JitBase): + """Wrapper around ``pystencils.cpu.cpujit``""" + + def compile(self, kernel: KernelFunction) -> KernelWrapper: + return compile_and_load(kernel) + + def make_python_function(kernel_function_node, custom_backend=None): """ Creates C code from the abstract syntax tree, compiles it and makes it accessible as Python function @@ -449,4 +471,4 @@ def compile_and_load(kernel: KernelFunction, custom_backend=None): ) result = load_kernel_from_file(code_hash_str, kernel.name, lib_file) - return KernelWrapper(result, kernel.parameters, kernel) + return CpuKernelWrapper(kernel, result) diff --git a/src/pystencils/backend/kernelcreation/analysis.py b/src/pystencils/backend/kernelcreation/analysis.py index 05aa7992819be66ed7816882f9eb96372e143d64..0776848649766862c48d0a52ed3511274871450c 100644 --- a/src/pystencils/backend/kernelcreation/analysis.py +++ b/src/pystencils/backend/kernelcreation/analysis.py @@ -9,8 +9,8 @@ import sympy as sp from .context import KernelCreationContext from ...field import Field -from ...assignment import Assignment from ...simp import AssignmentCollection +from sympy.codegen.ast import AssignmentBase from ..exceptions import PsInternalCompilerError, KernelConstraintsError @@ -43,13 +43,16 @@ class KernelAnalysis: FieldAndIndex = namedtuple("FieldAndIndex", ["field", "index"]) - def __init__(self, ctx: KernelCreationContext): + def __init__( + self, + ctx: KernelCreationContext, + check_access_independence: bool = True, + check_double_writes: bool = True, + ): self._ctx = ctx - self._check_access_independence = False - self._check_double_writes = ( - False # TODO: Access independence check implies double writes check - ) + self._check_access_independence = check_access_independence + self._check_double_writes = check_double_writes # Map pairs of fields and indices to offsets self._field_writes: dict[KernelAnalysis.FieldAndIndex, set[Any]] = defaultdict( @@ -63,7 +66,9 @@ class KernelAnalysis: self._called = False - def __call__(self, obj: AssignmentCollection | Sequence[Assignment] | Assignment): + def __call__( + self, obj: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase + ): if self._called: raise PsInternalCompilerError("KernelAnalysis called twice!") @@ -83,7 +88,7 @@ class KernelAnalysis: for asm in asms: self._visit(asm) - case Assignment(): + case AssignmentBase(): self._handle_rhs(obj.rhs) self._handle_lhs(obj.lhs) diff --git a/src/pystencils/backend/kernelcreation/ast_factory.py b/src/pystencils/backend/kernelcreation/ast_factory.py index d5695be93a7a89134f3bc7a12f623006768579d1..a0328a123893a9f103c0ef66aa98028cc5437708 100644 --- a/src/pystencils/backend/kernelcreation/ast_factory.py +++ b/src/pystencils/backend/kernelcreation/ast_factory.py @@ -97,10 +97,14 @@ class AstFactory: return PsExpression.make(PsConstant(idx, self._ctx.index_dtype)) def _parse_any_index(self, idx: Any) -> PsExpression: - return self.parse_index(cast(IndexParsable, idx)) + if not isinstance(idx, _IndexParsable): + raise TypeError(f"Cannot parse {idx} as an index expression") + return self.parse_index(idx) def parse_slice( - self, slic: slice, upper_limit: Any | None = None + self, + iter_slice: IndexParsable | slice, + normalize_to: IndexParsable | None = None, ) -> tuple[PsExpression, PsExpression, PsExpression]: """Parse a slice to obtain start, stop and step expressions for a loop or iteration space dimension. @@ -109,30 +113,63 @@ class AstFactory: They may also be sympy expressions or integer constants, in which case they are parsed to AST objects and must also typify with the kernel creation context's ``index_dtype``. - If the slice's ``stop`` member is `None` or a negative `int`, `upper_limit` must be specified, which is then - used as the upper iteration limit as either ``upper_limit`` or ``upper_limit - stop``. + The `step` member of the slice, if it is constant, must be positive. + + The slice may optionally be normalized with respect to an upper iteration limit. + If `normalize_to` is specified, negative integers in `iter_slice.start` and `iter_slice.stop` will + be added to that normalization limit. Args: - slic: The iteration slice - upper_limit: Optionally, the upper iteration limit + iter_slice: The iteration slice + normalize_to: The upper iteration limit with respect to which the slice should be normalized """ - if slic.stop is None or (isinstance(slic.stop, int) and slic.stop < 0): - if upper_limit is None: + from pystencils.backend.transformations import EliminateConstants + + fold = EliminateConstants(self._ctx) + + start: PsExpression + stop: PsExpression | None + step: PsExpression + + if not isinstance(iter_slice, slice): + start = self.parse_index(iter_slice) + stop = fold( + self._typify(self.parse_index(iter_slice) + self.parse_index(1)) + ) + step = self.parse_index(1) + else: + start = self._parse_any_index( + iter_slice.start if iter_slice.start is not None else 0 + ) + stop = ( + self._parse_any_index(iter_slice.stop) + if iter_slice.stop is not None + else None + ) + step = self._parse_any_index( + iter_slice.step if iter_slice.step is not None else 1 + ) + + if isinstance(step, PsConstantExpr) and step.constant.value <= 0: raise ValueError( - "Must specify an upper iteration limit if `slice.stop` is `None` or a negative `int`" + f"Invalid value for `slice.step`: {step.constant.value}" ) - start = self._parse_any_index(slic.start if slic.start is not None else 0) - stop = ( - self._parse_any_index(slic.stop) - if slic.stop is not None - else self._parse_any_index(upper_limit) - ) - step = self._parse_any_index(slic.step if slic.step is not None else 1) + if normalize_to is not None: + upper_limit = self.parse_index(normalize_to) + if isinstance(start, PsConstantExpr) and start.constant.value < 0: + start = fold(self._typify(upper_limit.clone() + start)) - if isinstance(slic.stop, int) and slic.stop < 0: - stop = self._parse_any_index(upper_limit) + stop + if stop is None: + stop = upper_limit + elif isinstance(stop, PsConstantExpr) and stop.constant.value < 0: + stop = fold(self._typify(upper_limit.clone() + stop)) + + elif stop is None: + raise ValueError( + "Cannot parse a slice with `stop == None` if no normalization limit is given" + ) return start, stop, step diff --git a/src/pystencils/backend/kernelcreation/context.py b/src/pystencils/backend/kernelcreation/context.py index 916274314b1651aae29619ef2a6a59e15b1d3cc6..464c08dd9129f98df56db68906ff5eba6a225a3e 100644 --- a/src/pystencils/backend/kernelcreation/context.py +++ b/src/pystencils/backend/kernelcreation/context.py @@ -239,7 +239,7 @@ class KernelCreationContext: num_entries = field.index_shape[0] if field.index_shape else 1 if not isinstance(num_entries, int): raise KernelConstraintsError( - f"Invalid index shape of buffer field {field.name}: {field.spatial_dimensions}. " + f"Invalid index shape of buffer field {field.name}: {num_entries}. " "Buffer fields cannot have variable index shape." ) diff --git a/src/pystencils/backend/kernelcreation/freeze.py b/src/pystencils/backend/kernelcreation/freeze.py index f81ed586bc60b6d831ea4b03ff37612841fc7e78..25ce2811516ae361fc8efc074686f64f762e89ab 100644 --- a/src/pystencils/backend/kernelcreation/freeze.py +++ b/src/pystencils/backend/kernelcreation/freeze.py @@ -299,7 +299,7 @@ class FreezeExpressions: if not access.is_absolute_access: match field.field_type: - case FieldType.GENERIC: + case FieldType.GENERIC | FieldType.CUSTOM: # Add the iteration counters offsets = [ PsExpression.make(i) + o @@ -319,8 +319,6 @@ class FreezeExpressions: compressed_ctr = ispace.compressed_counter() assert len(offsets) == 1 offsets = [compressed_ctr + offsets[0]] - case FieldType.CUSTOM: - raise ValueError("Custom fields support only absolute accesses.") case unknown: raise NotImplementedError( f"Cannot translate accesses to field type {unknown} yet." diff --git a/src/pystencils/backend/kernelcreation/iteration_space.py b/src/pystencils/backend/kernelcreation/iteration_space.py index bfebd5af6925f560eae9bbddc7a75f41d2e5a876..8175fffed9782d9271262b7bc220e4dcdc208705 100644 --- a/src/pystencils/backend/kernelcreation/iteration_space.py +++ b/src/pystencils/backend/kernelcreation/iteration_space.py @@ -121,7 +121,7 @@ class FullIterationSpace(IterationSpace): @staticmethod def create_from_slice( ctx: KernelCreationContext, - iteration_slice: slice | Sequence[slice], + iteration_slice: int | slice | tuple[int | slice, ...], archetype_field: Field | None = None, ): """Create an iteration space from a sequence of slices, optionally over an archetype field. @@ -131,7 +131,7 @@ class FullIterationSpace(IterationSpace): iteration_slice: The iteration slices for each dimension; for valid formats, see `AstFactory.parse_slice` archetype_field: Optionally, an archetype field that dictates the upper slice limits and loop order. """ - if isinstance(iteration_slice, slice): + if not isinstance(iteration_slice, tuple): iteration_slice = (iteration_slice,) dim = len(iteration_slice) @@ -163,7 +163,9 @@ class FullIterationSpace(IterationSpace): factory = AstFactory(ctx) - def to_dim(slic: slice, size: PsSymbol | PsConstant | None, ctr: PsSymbol): + def to_dim( + slic: int | slice, size: PsSymbol | PsConstant | None, ctr: PsSymbol + ): start, stop, step = factory.parse_slice(slic, size) return FullIterationSpace.Dimension(start, stop, step, ctr) @@ -393,7 +395,7 @@ def create_full_iteration_space( ctx: KernelCreationContext, assignments: AssignmentCollection, ghost_layers: None | int | Sequence[int | tuple[int, int]] = None, - iteration_slice: None | Sequence[slice] = None, + iteration_slice: None | int | slice | tuple[int | slice, ...] = None, ) -> IterationSpace: assert not ctx.fields.index_fields @@ -443,7 +445,9 @@ def create_full_iteration_space( ) else: if len(domain_field_accesses) > 0: - inferred_gls = max([fa.required_ghost_layers for fa in domain_field_accesses]) + inferred_gls = max( + [fa.required_ghost_layers for fa in domain_field_accesses] + ) else: inferred_gls = 0 diff --git a/src/pystencils/backend/platforms/cuda.py b/src/pystencils/backend/platforms/cuda.py index 4233784825659362bb028f40d5f15d1c28402be1..c89d2278881bc48cd81df44ab49adefda4fc99f3 100644 --- a/src/pystencils/backend/platforms/cuda.py +++ b/src/pystencils/backend/platforms/cuda.py @@ -11,7 +11,14 @@ from ..kernelcreation import ( from ..kernelcreation.context import KernelCreationContext from ..ast.structural import PsBlock, PsConditional, PsDeclaration -from ..ast.expressions import PsExpression, PsLiteralExpr, PsCast, PsCall +from ..ast.expressions import ( + PsExpression, + PsLiteralExpr, + PsCast, + PsCall, + PsLookup, + PsArrayAccess, +) from ..ast.expressions import PsLt, PsAnd from ...types import PsSignedIntegerType, PsIeeeFloatType from ..literals import PsLiteral @@ -154,12 +161,36 @@ class CudaPlatform(GenericGpu): ) -> tuple[PsBlock, GpuThreadsRange]: ispace.sparse_counter.dtype = constify(ispace.sparse_counter.get_dtype()) - ctr = PsExpression.make(ispace.sparse_counter) + sparse_ctr = PsExpression.make(ispace.sparse_counter) thread_idx = self._linear_thread_idx(0) - idx_decl = self._typify(PsDeclaration(ctr, PsCast(ctr.get_dtype(), thread_idx))) - body.statements = [idx_decl] + body.statements + sparse_idx_decl = self._typify( + PsDeclaration(sparse_ctr, PsCast(sparse_ctr.get_dtype(), thread_idx)) + ) + + mappings = [ + PsDeclaration( + PsExpression.make(ctr), + PsLookup( + PsArrayAccess( + ispace.index_list.base_pointer, + sparse_ctr, + ), + coord.name, + ), + ) + for ctr, coord in zip(ispace.spatial_indices, ispace.coordinate_members) + ] + body.statements = mappings + body.statements + + if not self._cfg.omit_range_check: + stop = PsExpression.make(ispace.index_list.shape[0]) + condition = PsLt(sparse_ctr, stop) + ast = PsBlock([sparse_idx_decl, PsConditional(condition, body)]) + else: + body.statements = [sparse_idx_decl] + body.statements + ast = body - return body, GpuThreadsRange.from_ispace(ispace) + return ast, GpuThreadsRange.from_ispace(ispace) def _linear_thread_idx(self, coord: int): block_size = BLOCK_DIM[coord] diff --git a/src/pystencils/backend/platforms/sycl.py b/src/pystencils/backend/platforms/sycl.py index 52953115ab3a50139c6bfdf047cc8f9fa53714d8..39ec09992765f388c31ec1cb003a4d5e7849fbca 100644 --- a/src/pystencils/backend/platforms/sycl.py +++ b/src/pystencils/backend/platforms/sycl.py @@ -5,7 +5,19 @@ from ..kernelcreation.iteration_space import ( SparseIterationSpace, ) from ..ast.structural import PsDeclaration, PsBlock, PsConditional -from ..ast.expressions import PsExpression, PsSymbolExpr, PsSubscript, PsLt, PsAnd, PsCall, PsGe, PsLe, PsTernary +from ..ast.expressions import ( + PsExpression, + PsSymbolExpr, + PsSubscript, + PsLt, + PsAnd, + PsCall, + PsGe, + PsLe, + PsTernary, + PsLookup, + PsArrayAccess +) from ..extensions.cpp import CppMethodCall from ..kernelcreation.context import KernelCreationContext @@ -40,7 +52,7 @@ class SyclPlatform(GenericGpu): def select_function(self, call: PsCall) -> PsExpression: assert isinstance(call.function, PsMathFunction) - + func = call.function.func dtype = call.get_dtype() arg_types = (dtype,) * func.num_args @@ -70,13 +82,13 @@ class SyclPlatform(GenericGpu): call.function = cfunc return call - + if isinstance(dtype, PsIntegerType): match func: case MathFunctions.Abs: zero = PsExpression.make(PsConstant(0, dtype)) arg = call.args[0] - return PsTernary(PsGe(arg, zero), arg, - arg) + return PsTernary(PsGe(arg, zero), arg, -arg) case MathFunctions.Min: arg1, arg2 = call.args return PsTernary(PsLe(arg1, arg2), arg1, arg2) @@ -144,11 +156,33 @@ class SyclPlatform(GenericGpu): ispace.sparse_counter.dtype = constify(ispace.sparse_counter.get_dtype()) subscript.dtype = ispace.sparse_counter.get_dtype() - ctr = PsExpression.make(ispace.sparse_counter) - unpacking = PsDeclaration(ctr, subscript) - body.statements = [unpacking] + body.statements + sparse_ctr = PsExpression.make(ispace.sparse_counter) + sparse_idx_decl = PsDeclaration(sparse_ctr, subscript) + + mappings = [ + PsDeclaration( + PsExpression.make(ctr), + PsLookup( + PsArrayAccess( + ispace.index_list.base_pointer, + sparse_ctr, + ), + coord.name, + ), + ) + for ctr, coord in zip(ispace.spatial_indices, ispace.coordinate_members) + ] + body.statements = mappings + body.statements + + if not self._cfg.omit_range_check: + stop = PsExpression.make(ispace.index_list.shape[0]) + condition = PsLt(sparse_ctr, stop) + ast = PsBlock([sparse_idx_decl, PsConditional(condition, body)]) + else: + body.statements = [sparse_idx_decl] + body.statements + ast = body - return body, GpuThreadsRange.from_ispace(ispace) + return ast, GpuThreadsRange.from_ispace(ispace) def _item_type(self, rank: int): if not self._cfg.sycl_automatic_block_size: diff --git a/src/pystencils/config.py b/src/pystencils/config.py index 8e9909b31d2e8d13306794f9914f6a6a22c5170c..c8cf489f7a29857232ec9211479c26796850f4dc 100644 --- a/src/pystencils/config.py +++ b/src/pystencils/config.py @@ -10,7 +10,7 @@ from dataclasses import dataclass, InitVar from .enums import Target from .field import Field, FieldType -from .types import PsIntegerType, UserTypeSpec, PsIeeeFloatType +from .types import PsIntegerType, UserTypeSpec, PsIeeeFloatType, create_type from .defaults import DEFAULTS @@ -147,6 +147,9 @@ class GpuIndexingConfig: This check can be discarded through this option, at your own peril. """ + block_size: tuple[int, int, int] | None = None + """Desired block size for the execution of GPU kernels. May be overridden later by the runtime system.""" + sycl_automatic_block_size: bool = True """If set to `True` while generating for `Target.SYCL`, let the SYCL runtime decide on the block size. @@ -208,7 +211,7 @@ class CreateKernelConfig: """Data Types""" - index_dtype: PsIntegerType = DEFAULTS.index_dtype + index_dtype: UserTypeSpec = DEFAULTS.index_dtype """Data type used for all index calculations.""" default_dtype: UserTypeSpec = PsIeeeFloatType(64) @@ -217,6 +220,24 @@ class CreateKernelConfig: This data type will be applied to all untyped symbols. """ + """Analysis""" + + allow_double_writes: bool = False + """ + If True, don't check if every field is only written at a single location. This is required + for example for kernels that are compiled with loop step sizes > 1, that handle multiple + cells at once. Use with care! + """ + + skip_independence_check: bool = False + """ + By default the assignment list is checked for read/write independence. This means fields are only written at + locations where they are read. Doing so guarantees thread safety. In some cases e.g. for + periodicity kernel, this can not be assured and does the check needs to be deactivated. Use with care! + """ + + """Target-Specific Options""" + cpu_optim: None | CpuOptimConfig = None """Configuration of the CPU kernel optimizer. @@ -240,6 +261,46 @@ class CreateKernelConfig: cpu_vectorize_info: InitVar[dict | None] = None """Deprecated; use `cpu_optim.vectorize` instead.""" + gpu_indexing_params: InitVar[dict | None] = None + """Deprecated; use `gpu_indexing` instead.""" + + # Getters + + def get_jit(self) -> JitBase: + """Returns either the user-specified JIT compiler, or infers one from the target if none is given.""" + if self.jit is None: + if self.target.is_cpu(): + from .backend.jit import LegacyCpuJit + + return LegacyCpuJit() + elif self.target == Target.CUDA: + try: + from .backend.jit.gpu_cupy import CupyJit + + if ( + self.gpu_indexing is not None + and self.gpu_indexing.block_size is not None + ): + return CupyJit(self.gpu_indexing.block_size) + else: + return CupyJit() + + except ImportError: + from .backend.jit import no_jit + + return no_jit + + elif self.target == Target.SYCL: + from .backend.jit import no_jit + + return no_jit + else: + raise NotImplementedError( + f"No default JIT compiler implemented yet for target {self.target}" + ) + else: + return self.jit + # Postprocessing def __post_init__(self, *args): @@ -247,6 +308,10 @@ class CreateKernelConfig: # Check deprecated options self._check_deprecations(*args) + # Check index data type + if not isinstance(create_type(self.index_dtype), PsIntegerType): + raise PsOptionsError("`index_dtype` was not an integer type.") + # Check iteration space argument consistency if ( int(self.iteration_slice is not None) @@ -270,11 +335,6 @@ class CreateKernelConfig: # Check optim if self.cpu_optim is not None: - if not self.target.is_cpu(): - raise PsOptionsError( - f"`cpu_optim` cannot be set for non-CPU target {self.target}" - ) - if ( self.cpu_optim.vectorize is not False and not self.target.is_vector_cpu() @@ -284,41 +344,25 @@ class CreateKernelConfig: ) if self.gpu_indexing is not None: - if self.target != Target.SYCL: - raise PsOptionsError( - f"`gpu_indexing` cannot be set for non-SYCL target {self.target}" - ) - - # Infer JIT - if self.jit is None: - if self.target.is_cpu(): - from .backend.jit import LegacyCpuJit - - self.jit = LegacyCpuJit() - elif self.target == Target.CUDA: - try: - from .backend.jit.gpu_cupy import CupyJit - - self.jit = CupyJit() - except ImportError: - from .backend.jit import no_jit - - self.jit = no_jit - - elif self.target == Target.SYCL: - from .backend.jit import no_jit - - self.jit = no_jit - else: - raise NotImplementedError( - f"No default JIT compiler implemented yet for target {self.target}" - ) + if isinstance(self.gpu_indexing, str): + match self.gpu_indexing: + case "block": + self.gpu_indexing = GpuIndexingConfig() + case "line": + raise NotImplementedError( + "GPU line indexing is currently unavailable." + ) + case other: + raise PsOptionsError( + f"Invalid value for option gpu_indexing: {other}" + ) def _check_deprecations( self, data_type: UserTypeSpec | None, cpu_openmp: bool | int | None, cpu_vectorize_info: dict | None, + gpu_indexing_params: dict | None, ): optim: CpuOptimConfig | None = None @@ -360,6 +404,18 @@ class CreateKernelConfig: else: self.cpu_optim = optim + if gpu_indexing_params is not None: + _deprecated_option("gpu_indexing_params", "gpu_indexing") + + if self.gpu_indexing is not None: + raise PsOptionsError( + "Cannot specify both `gpu_indexing` and the deprecated `gpu_indexing_params` at the same time." + ) + + self.gpu_indexing = GpuIndexingConfig( + block_size=gpu_indexing_params.get("block_size", None) + ) + def _deprecated_option(name, instead): from warnings import warn diff --git a/src/pystencils/gpu/periodicity.py b/src/pystencils/gpu/periodicity.py new file mode 100644 index 0000000000000000000000000000000000000000..6569fbb0f14ab6b44add1c93cf8b2210699deef4 --- /dev/null +++ b/src/pystencils/gpu/periodicity.py @@ -0,0 +1,75 @@ +import numpy as np +from itertools import product + +from pystencils import CreateKernelConfig, create_kernel +from pystencils import Assignment, Field +from pystencils.enums import Target +from pystencils.slicing import get_periodic_boundary_src_dst_slices, normalize_slice + + +def create_copy_kernel( + domain_size, + src_slice, + dst_slice, + index_dimensions=0, + index_dim_shape=1, + dtype=np.float64, +): + """Copies a rectangular part of an array to another non-overlapping part""" + + field = Field.create_generic( + "field", len(domain_size), index_dimensions=index_dimensions, dtype=dtype + ) + normalized_src_slice = normalize_slice(src_slice, field.spatial_shape) + normalized_dst_slice = normalize_slice(dst_slice, field.spatial_shape) + + offset = [ + s1.start - s2.start + for s1, s2 in zip(normalized_src_slice, normalized_dst_slice) + ] + assert offset == [ + s1.stop - s2.stop for s1, s2 in zip(normalized_src_slice, normalized_dst_slice) + ], "Slices have to have same size" + + update_eqs = [] + if index_dimensions < 2: + index_dim_shape = [index_dim_shape] + for i in product(*[range(d) for d in index_dim_shape]): + eq = Assignment(field(*i), field[tuple(offset)](*i)) + update_eqs.append(eq) + + config = CreateKernelConfig( + target=Target.GPU, iteration_slice=dst_slice, skip_independence_check=True + ) + + ast = create_kernel(update_eqs, config=config) + return ast + + +def get_periodic_boundary_functor( + stencil, + domain_size, + index_dimensions=0, + index_dim_shape=1, + ghost_layers=1, + thickness=None, + dtype=np.float64, + target=Target.GPU, +): + assert target in {Target.GPU} + src_dst_slice_tuples = get_periodic_boundary_src_dst_slices( + stencil, ghost_layers, thickness + ) + kernels = [] + + for src_slice, dst_slice in src_dst_slice_tuples: + ast = create_copy_kernel( + domain_size, src_slice, dst_slice, index_dimensions, index_dim_shape, dtype + ) + kernels.append(ast.compile()) + + def functor(field, **_): + for kernel in kernels: + kernel(field=field) + + return functor diff --git a/src/pystencils/kernel_wrapper.py b/src/pystencils/kernel_wrapper.py index fba94abd5b67ddfa5d196b24cabd1c6633c9253a..afce06d77a17e0eb067d84a02bc273ba0668fc55 100644 --- a/src/pystencils/kernel_wrapper.py +++ b/src/pystencils/kernel_wrapper.py @@ -1,26 +1,3 @@ -import pystencils +from .backend.jit import KernelWrapper as _KernelWrapper - -class KernelWrapper: - """ - Light-weight wrapper around a compiled kernel. - - Can be called while still providing access to underlying AST. - """ - - def __init__(self, kernel, parameters, ast_node): - self.kernel = kernel - self.parameters = parameters - self.ast = ast_node - self.num_regs = None - - def __call__(self, **kwargs): - return self.kernel(**kwargs) - - @property - def target(self) -> pystencils.Target: - return self.ast.target - - @property - def code(self): - return pystencils.get_code_str(self.ast) +KernelWrapper = _KernelWrapper diff --git a/src/pystencils/kernelcreation.py b/src/pystencils/kernelcreation.py index 54a2c3585a4948193edf01a5d7ae86e4fb523a71..ae64bdea3a95db84641e91ff455825b21f772e0b 100644 --- a/src/pystencils/kernelcreation.py +++ b/src/pystencils/kernelcreation.py @@ -1,9 +1,10 @@ -from typing import cast +from typing import cast, Sequence +from dataclasses import replace from .enums import Target from .config import CreateKernelConfig from .backend import KernelFunction -from .types import create_numeric_type +from .types import create_numeric_type, PsIntegerType from .backend.ast.structural import PsBlock from .backend.kernelcreation import ( KernelCreationContext, @@ -28,39 +29,55 @@ from .backend.kernelfunction import ( ) from .simp import AssignmentCollection -from .assignment import Assignment +from sympy.codegen.ast import AssignmentBase __all__ = ["create_kernel"] def create_kernel( - assignments: AssignmentCollection | list[Assignment] | Assignment, - config: CreateKernelConfig = CreateKernelConfig(), + assignments: AssignmentCollection | Sequence[AssignmentBase] | AssignmentBase, + config: CreateKernelConfig | None = None, + **kwargs, ) -> KernelFunction: """Create a kernel function from a set of assignments. Args: assignments: The kernel's sequence of assignments, expressed using SymPy config: The configuration for the kernel translator + kwargs: If ``config`` is not set, it is created from the keyword arguments; + if it is set, its option will be overridden by any keyword arguments. Returns: The numerical kernel in pystencil's internal representation, ready to be exported or compiled """ + if not config: + config = CreateKernelConfig() + + if kwargs: + config = replace(config, **kwargs) + + idx_dtype = create_numeric_type(config.index_dtype) + assert isinstance(idx_dtype, PsIntegerType) + ctx = KernelCreationContext( default_dtype=create_numeric_type(config.default_dtype), - index_dtype=config.index_dtype, + index_dtype=idx_dtype, ) - if isinstance(assignments, Assignment): + if isinstance(assignments, AssignmentBase): assignments = [assignments] if not isinstance(assignments, AssignmentCollection): - assignments = AssignmentCollection(assignments) + assignments = AssignmentCollection(assignments) # type: ignore + + _ = _parse_simplification_hints(assignments) - analysis = KernelAnalysis(ctx) + analysis = KernelAnalysis( + ctx, not config.skip_independence_check, not config.allow_double_writes + ) analysis(assignments) if len(ctx.fields.index_fields) > 0 or config.index_field is not None: @@ -132,11 +149,14 @@ def create_kernel( select_functions = SelectFunctions(platform) kernel_ast = cast(PsBlock, select_functions(kernel_ast)) - assert config.jit is not None - if config.target.is_cpu(): return create_cpu_kernel_function( - ctx, platform, kernel_ast, config.function_name, config.target, config.jit + ctx, + platform, + kernel_ast, + config.function_name, + config.target, + config.get_jit(), ) else: return create_gpu_kernel_function( @@ -146,9 +166,20 @@ def create_kernel( gpu_threads, config.function_name, config.target, - config.jit, + config.get_jit(), ) -def create_staggered_kernel(assignments, target: Target = Target.CPU, gpu_exclusive_conditions=False, **kwargs): - raise NotImplementedError("Staggered kernels are not yet implemented for pystencils 2.0") +def create_staggered_kernel( + assignments, target: Target = Target.CPU, gpu_exclusive_conditions=False, **kwargs +): + raise NotImplementedError( + "Staggered kernels are not yet implemented for pystencils 2.0" + ) + + +# Internals + +def _parse_simplification_hints(ac: AssignmentCollection): + if "split_groups" in ac.simplification_hints: + raise NotImplementedError("Loop splitting was requested, but is not implemented yet") diff --git a/src/pystencils/old/gpu/periodicity.py b/src/pystencils/old/gpu/periodicity.py deleted file mode 100644 index 21aa95e5672718f79221a38259ab91a95a20366d..0000000000000000000000000000000000000000 --- a/src/pystencils/old/gpu/periodicity.py +++ /dev/null @@ -1,49 +0,0 @@ -import numpy as np -from itertools import product - -from pystencils import CreateKernelConfig, create_kernel -from pystencils.gpu import make_python_function -from pystencils import Assignment, Field -from pystencils.enums import Target -from pystencils.slicing import get_periodic_boundary_src_dst_slices, normalize_slice - - -def create_copy_kernel(domain_size, from_slice, to_slice, index_dimensions=0, index_dim_shape=1, dtype=np.float64): - """Copies a rectangular part of an array to another non-overlapping part""" - - f = Field.create_generic("pdfs", len(domain_size), index_dimensions=index_dimensions, dtype=dtype) - normalized_from_slice = normalize_slice(from_slice, f.spatial_shape) - normalized_to_slice = normalize_slice(to_slice, f.spatial_shape) - - offset = [s1.start - s2.start for s1, s2 in zip(normalized_from_slice, normalized_to_slice)] - assert offset == [s1.stop - s2.stop for s1, s2 in zip(normalized_from_slice, normalized_to_slice)], \ - "Slices have to have same size" - - update_eqs = [] - if index_dimensions < 2: - index_dim_shape = [index_dim_shape] - for i in product(*[range(d) for d in index_dim_shape]): - eq = Assignment(f(*i), f[tuple(offset)](*i)) - update_eqs.append(eq) - - config = CreateKernelConfig(target=Target.GPU, iteration_slice=to_slice, skip_independence_check=True) - - ast = create_kernel(update_eqs, config=config) - return ast - - -def get_periodic_boundary_functor(stencil, domain_size, index_dimensions=0, index_dim_shape=1, ghost_layers=1, - thickness=None, dtype=np.float64, target=Target.GPU): - assert target in {Target.GPU} - src_dst_slice_tuples = get_periodic_boundary_src_dst_slices(stencil, ghost_layers, thickness) - kernels = [] - - for src_slice, dst_slice in src_dst_slice_tuples: - ast = create_copy_kernel(domain_size, src_slice, dst_slice, index_dimensions, index_dim_shape, dtype) - kernels.append(make_python_function(ast)) - - def functor(pdfs, **_): - for kernel in kernels: - kernel(pdfs=pdfs) - - return functor diff --git a/src/pystencils/types/parsing.py b/src/pystencils/types/parsing.py index d6522e5bbf6c37c5f105114b40ad73b82fe3e1fd..5771eaca84413708c68c4f7941e07cbd63403e9e 100644 --- a/src/pystencils/types/parsing.py +++ b/src/pystencils/types/parsing.py @@ -95,6 +95,9 @@ def interpret_python_type(t: type) -> PsType: def interpret_numpy_dtype(t: np.dtype) -> PsType: if t.fields is not None: # it's a struct + if not t.isalignedstruct: + raise ValueError("pystencils currently only accepts aligned structured data types.") + members = [] for fname, fspec in t.fields.items(): members.append(PsStructType.Member(fname, interpret_numpy_dtype(fspec[0]))) diff --git a/tests/test_gpu.py b/tests/nbackend/kernelcreation/test_gpu.py similarity index 94% rename from tests/test_gpu.py rename to tests/nbackend/kernelcreation/test_gpu.py index 84ef340458d76d971e468e534499032f2fd56cc5..10d2224748b4c90d9c705bf141acba9db6719e9d 100644 --- a/tests/test_gpu.py +++ b/tests/nbackend/kernelcreation/test_gpu.py @@ -6,8 +6,8 @@ import sympy as sp from scipy.ndimage import convolve from pystencils import Assignment, Field, fields, CreateKernelConfig, create_kernel, Target -from pystencils.gpu import BlockIndexing -from pystencils.sympyextensions import sympy_cse_on_assignment_list +# from pystencils.gpu import BlockIndexing +from pystencils.simp import sympy_cse_on_assignment_list from pystencils.slicing import add_ghost_layers, make_slice, remove_ghost_layers, normalize_slice try: @@ -74,7 +74,7 @@ def test_multiple_index_dimensions(): """Sums along the last axis of a numpy array""" src_size = (7, 6, 4) dst_size = src_size[:2] - src_arr = np.asfortranarray(np.random.rand(*src_size)) + src_arr = np.array(np.random.rand(*src_size)) dst_arr = np.zeros(dst_size) src_field = Field.create_from_numpy_array('src', src_arr, index_dimensions=1) @@ -102,6 +102,7 @@ def test_multiple_index_dimensions(): np.testing.assert_almost_equal(reference, dst_arr) +@pytest.mark.xfail(reason="Line indexing not available yet") def test_ghost_layer(): size = (6, 5) src_arr = np.ones(size) @@ -126,6 +127,7 @@ def test_ghost_layer(): np.testing.assert_equal(reference, dst_arr) +@pytest.mark.xfail(reason="Line indexing not available yet") def test_setting_value(): arr_cpu = np.arange(25, dtype=np.float64).reshape(5, 5) arr_gpu = cp.asarray(arr_cpu) @@ -156,12 +158,13 @@ def test_periodicity(): cpu_result = np.copy(arr_cpu) periodic_cpu_kernel(cpu_result) - periodic_gpu_kernel(pdfs=arr_gpu) + periodic_gpu_kernel(arr_gpu) gpu_result = arr_gpu.get() np.testing.assert_equal(cpu_result, gpu_result) @pytest.mark.parametrize("device_number", device_numbers) +@pytest.mark.xfail(reason="Block indexing specification is not available yet") def test_block_indexing(device_number): f = fields("f: [3D]") s = normalize_slice(make_slice[:, :, :], f.spatial_shape) @@ -194,6 +197,7 @@ def test_block_indexing(device_number): @pytest.mark.parametrize('gpu_indexing', ("block", "line")) @pytest.mark.parametrize('layout', ("C", "F")) @pytest.mark.parametrize('shape', ((5, 5, 5, 5), (3, 17, 387, 4), (23, 44, 21, 11))) +@pytest.mark.xfail(reason="4D kernels not available yet") def test_four_dimensional_kernel(gpu_indexing, layout, shape): n_elements = np.prod(shape) diff --git a/tests/nbackend/kernelcreation/test_index_kernels.py b/tests/nbackend/kernelcreation/test_index_kernels.py index 817b4a244898f3061d37a0b69483b5a79253aad2..5093c43ff4f74343b0fcf3f45d34b1cfb6597d05 100644 --- a/tests/nbackend/kernelcreation/test_index_kernels.py +++ b/tests/nbackend/kernelcreation/test_index_kernels.py @@ -1,16 +1,32 @@ import numpy as np +import pytest -from pystencils import Assignment, Field, FieldType, AssignmentCollection +from pystencils import Assignment, Field, FieldType, AssignmentCollection, Target from pystencils.kernelcreation import create_kernel, CreateKernelConfig -def test_indexed_kernel(): - arr = np.zeros((3, 4)) - dtype = np.dtype([('x', int), ('y', int), ('value', arr.dtype)]) - index_arr = np.zeros((3,), dtype=dtype) - index_arr[0] = (0, 2, 3.0) - index_arr[1] = (1, 3, 42.0) - index_arr[2] = (2, 1, 5.0) +@pytest.mark.parametrize("target", [Target.CPU, Target.GPU]) +def test_indexed_kernel(target): + if target == Target.GPU: + cp = pytest.importorskip("cupy") + xp = cp + else: + xp = np + + arr = xp.zeros((3, 4)) + dtype = np.dtype([('x', int), ('y', int), ('value', arr.dtype)], align=True) + + cpu_index_arr = np.zeros((3,), dtype=dtype) + cpu_index_arr[0] = (0, 2, 3.0) + cpu_index_arr[1] = (1, 3, 42.0) + cpu_index_arr[2] = (2, 1, 5.0) + + if target == Target.GPU: + gpu_index_arr = cp.empty(cpu_index_arr.shape, cpu_index_arr.dtype) + gpu_index_arr.set(cpu_index_arr) + index_arr = gpu_index_arr + else: + index_arr = cpu_index_arr index_field = Field.create_from_numpy_array('index', index_arr, field_type=FieldType.INDEXED) normal_field = Field.create_from_numpy_array('f', arr) @@ -18,11 +34,14 @@ def test_indexed_kernel(): Assignment(normal_field[0, 0], index_field('value')) ]) - options = CreateKernelConfig(index_field=index_field) + options = CreateKernelConfig(index_field=index_field, target=target) ast = create_kernel(update_rule, options) kernel = ast.compile() kernel(f=arr, index=index_arr) - for i in range(index_arr.shape[0]): - np.testing.assert_allclose(arr[index_arr[i]['x'], index_arr[i]['y']], index_arr[i]['value'], atol=1e-13) + if target == Target.GPU: + arr = cp.asnumpy(arr) + + for i in range(cpu_index_arr.shape[0]): + np.testing.assert_allclose(arr[cpu_index_arr[i]['x'], cpu_index_arr[i]['y']], cpu_index_arr[i]['value'], atol=1e-13) diff --git a/tests/nbackend/kernelcreation/test_iteration_space.py b/tests/nbackend/kernelcreation/test_iteration_space.py index f9646afc26d11bddfb49c5a178096f9d2157d5f6..8ff678fbad398af18b71f2f30145ff34ab269685 100644 --- a/tests/nbackend/kernelcreation/test_iteration_space.py +++ b/tests/nbackend/kernelcreation/test_iteration_space.py @@ -1,29 +1,33 @@ import pytest +import numpy as np from pystencils import make_slice, Field, create_type from pystencils.sympyextensions.typed_sympy import TypedSymbol from pystencils.backend.constants import PsConstant -from pystencils.backend.kernelcreation import KernelCreationContext, FullIterationSpace +from pystencils.backend.kernelcreation import ( + KernelCreationContext, + FullIterationSpace, + AstFactory, +) from pystencils.backend.ast.expressions import PsAdd, PsConstantExpr, PsExpression from pystencils.backend.kernelcreation.typification import TypificationError -from pystencils.types.quick import Int -def test_slices(): +def test_slices_over_field(): ctx = KernelCreationContext() archetype_field = Field.create_generic("f", spatial_dimensions=3, layout="fzyx") ctx.add_field(archetype_field) - islice = (slice(1, -1, 1), slice(3, -3, 3), slice(0, None, -1)) + islice = (slice(1, -1, 1), slice(3, -3, 3), slice(0, None, 1)) ispace = FullIterationSpace.create_from_slice(ctx, islice, archetype_field) archetype_arr = ctx.get_array(archetype_field) dims = ispace.dimensions - for sl, size, dim in zip(islice, archetype_arr.shape, dims): + for sl, dim in zip(islice, dims): assert ( isinstance(dim.start, PsConstantExpr) and dim.start.constant.value == sl.start @@ -45,6 +49,105 @@ def test_slices(): assert dims[2].stop.structurally_equal(PsExpression.make(archetype_arr.shape[2])) +def test_slices_with_fixed_size_field(): + ctx = KernelCreationContext() + + archetype_field = Field.create_fixed_size("f", (4, 5, 6), layout="fzyx") + ctx.add_field(archetype_field) + + islice = (slice(1, -1, 1), slice(3, -3, 3), slice(0, None, 1)) + ispace = FullIterationSpace.create_from_slice(ctx, islice, archetype_field) + + archetype_arr = ctx.get_array(archetype_field) + + dims = ispace.dimensions + + for sl, size, dim in zip(islice, archetype_arr.shape, dims): + assert ( + isinstance(dim.start, PsConstantExpr) + and dim.start.constant.value == sl.start + ) + + assert isinstance(size, PsConstant) + + assert isinstance( + dim.stop, PsConstantExpr + ) and dim.stop.constant.value == np.int64( + size.value + sl.stop if sl.stop is not None else size.value + ) + + assert ( + isinstance(dim.step, PsConstantExpr) and dim.step.constant.value == sl.step + ) + + +def test_singular_slice_over_field(): + ctx = KernelCreationContext() + factory = AstFactory(ctx) + + archetype_field = Field.create_generic("f", spatial_dimensions=2, layout="fzyx") + ctx.add_field(archetype_field) + archetype_arr = ctx.get_array(archetype_field) + + islice = (4, -3) + ispace = FullIterationSpace.create_from_slice(ctx, islice, archetype_field) + + dims = ispace.dimensions + + assert dims[0].start.structurally_equal(factory.parse_index(4)) + + assert dims[0].stop.structurally_equal(factory.parse_index(5)) + + assert dims[1].start.structurally_equal( + PsExpression.make(archetype_arr.shape[1]) + factory.parse_index(-3) + ) + + assert dims[1].stop.structurally_equal( + PsExpression.make(archetype_arr.shape[1]) + factory.parse_index(-2) + ) + + +def test_slices_with_negative_start(): + ctx = KernelCreationContext() + factory = AstFactory(ctx) + + archetype_field = Field.create_generic("f", spatial_dimensions=2, layout="fzyx") + ctx.add_field(archetype_field) + archetype_arr = ctx.get_array(archetype_field) + + islice = (slice(-3, -1, 1), slice(-4, None, 1)) + ispace = FullIterationSpace.create_from_slice(ctx, islice, archetype_field) + + dims = ispace.dimensions + + assert dims[0].start.structurally_equal( + PsExpression.make(archetype_arr.shape[0]) + factory.parse_index(-3) + ) + + assert dims[1].start.structurally_equal( + PsExpression.make(archetype_arr.shape[1]) + factory.parse_index(-4) + ) + + +def test_field_independent_slices(): + ctx = KernelCreationContext() + + islice = (slice(-3, -1, 1), slice(-4, 7, 2)) + ispace = FullIterationSpace.create_from_slice(ctx, islice) + + dims = ispace.dimensions + + for sl, dim in zip(islice, dims): + assert isinstance(dim.start, PsConstantExpr) + assert dim.start.constant.value == np.int64(sl.start) + + assert isinstance(dim.stop, PsConstantExpr) + assert dim.stop.constant.value == np.int64(sl.stop) + + assert isinstance(dim.step, PsConstantExpr) + assert dim.step.constant.value == np.int64(sl.step) + + def test_invalid_slices(): ctx = KernelCreationContext() @@ -59,6 +162,14 @@ def test_invalid_slices(): with pytest.raises(TypificationError): FullIterationSpace.create_from_slice(ctx, islice, archetype_field) + islice = (slice(1, 3, 0),) + with pytest.raises(ValueError): + FullIterationSpace.create_from_slice(ctx, islice, archetype_field) + + islice = (slice(1, 3, -1),) + with pytest.raises(ValueError): + FullIterationSpace.create_from_slice(ctx, islice, archetype_field) + def test_iteration_count(): ctx = KernelCreationContext() @@ -69,16 +180,14 @@ def test_iteration_count(): three = PsExpression.make(PsConstant(3, ctx.index_dtype)) ispace = FullIterationSpace.create_from_slice( - ctx, make_slice[three : i-two, 1:8:3] + ctx, make_slice[three : i - two, 1:8:3] ) iters = [ispace.actual_iterations(coord) for coord in range(2)] assert iters[0].structurally_equal((i - two) - three) assert iters[1].structurally_equal(three) - empty_ispace = FullIterationSpace.create_from_slice( - ctx, make_slice[4:4:1, 4:4:7] - ) + empty_ispace = FullIterationSpace.create_from_slice(ctx, make_slice[4:4:1, 4:4:7]) iters = [empty_ispace.actual_iterations(coord) for coord in range(2)] assert iters[0].structurally_equal(zero) diff --git a/tests/nbackend/kernelcreation/test_typification.py b/tests/nbackend/kernelcreation/test_typification.py index d3da7e8881d631266d58ee6cc0d4d3612a2900a1..2ebfa2ec8ec8cf542aa55bbccd770ef0a7e9f5d4 100644 --- a/tests/nbackend/kernelcreation/test_typification.py +++ b/tests/nbackend/kernelcreation/test_typification.py @@ -141,7 +141,7 @@ def test_lhs_constness(): with pytest.raises(TypificationError): _ = typify(freeze(Assignment(f_const.absolute_access([0], [0]), x + y))) - np_struct = np.dtype([("size", np.uint32), ("data", np.float32)]) + np_struct = np.dtype([("size", np.uint32), ("data", np.float32)], align=True) struct_type = constify(create_type(np_struct)) struct_field = Field.create_generic( "struct_field", 1, dtype=struct_type, field_type=FieldType.CUSTOM @@ -167,7 +167,7 @@ def test_typify_structs(): freeze = FreezeExpressions(ctx) typify = Typifier(ctx) - np_struct = np.dtype([("size", np.uint32), ("data", np.float32)]) + np_struct = np.dtype([("size", np.uint32), ("data", np.float32)], align=True) f = Field.create_generic("f", 1, dtype=np_struct, field_type=FieldType.CUSTOM) x = sp.Symbol("x")