diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py index 9efca1f53a6870d2b0ce2bdd1da547d16beb6a6c..3f016810a9a673035adf1130039bc95e5d6ab70d 100644 --- a/pystencils/backends/cbackend.py +++ b/pystencils/backends/cbackend.py @@ -10,11 +10,12 @@ from sympy.printing.ccode import C89CodePrinter from pystencils.astnodes import KernelFunction, Node from pystencils.cpu.vectorization import vec_all, vec_any from pystencils.data_types import ( - PointerType, VectorType, address_of, cast_func, create_type, get_type_of_expression, reinterpret_cast_func, - vector_memory_access) + PointerType, VectorType, address_of, cast_func, create_type, get_type_of_expression, + reinterpret_cast_func, vector_memory_access) from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt from pystencils.integer_functions import ( - bit_shift_left, bit_shift_right, bitwise_and, bitwise_or, bitwise_xor, int_div, int_power_of_2, modulo_ceil) + bit_shift_left, bit_shift_right, bitwise_and, bitwise_or, bitwise_xor, + int_div, int_power_of_2, modulo_ceil) try: from sympy.printing.ccode import C99CodePrinter as CCodePrinter @@ -91,7 +92,7 @@ def get_global_declarations(ast): visit_node(ast) - return sorted(set(global_declarations), key=lambda x: str(x)) + return sorted(set(global_declarations), key=str) def get_headers(ast_node: Node) -> Set[str]: diff --git a/pystencils/backends/cuda_backend.py b/pystencils/backends/cuda_backend.py index 8bc58468901a006bc7ef8f278e3e7a544234de52..d590d65b4082e72745658f0b06eb152d64872944 100644 --- a/pystencils/backends/cuda_backend.py +++ b/pystencils/backends/cuda_backend.py @@ -3,7 +3,7 @@ from os.path import dirname, join from pystencils.astnodes import Node from pystencils.backends.cbackend import CBackend, CustomSympyPrinter, generate_c from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt -from pystencils.interpolation_astnodes import InterpolationMode +from pystencils.interpolation_astnodes import DiffInterpolatorAccess, InterpolationMode with open(join(dirname(__file__), 'cuda_known_functions.txt')) as f: lines = f.readlines() @@ -70,10 +70,13 @@ class CudaSympyPrinter(CustomSympyPrinter): super(CudaSympyPrinter, self).__init__() self.known_functions.update(CUDA_KNOWN_FUNCTIONS) - def _print_TextureAccess(self, node): - dtype = node.texture.field.dtype.numpy_dtype + def _print_InterpolatorAccess(self, node): + dtype = node.interpolator.field.dtype.numpy_dtype - if node.texture.interpolation_mode == InterpolationMode.CUBIC_SPLINE: + if type(node) == DiffInterpolatorAccess: + # cubicTex3D_1st_derivative_x(texture tex, float3 coord) + template = f"cubicTex%iD_1st_derivative_{list(reversed('xyz'[:node.ndim]))[node.diff_coordinate_idx]}(%s, %s)" # noqa + elif node.interpolator.interpolation_mode == InterpolationMode.CUBIC_SPLINE: template = "cubicTex%iDSimple(%s, %s)" else: if dtype.itemsize > 4: @@ -84,8 +87,8 @@ class CudaSympyPrinter(CustomSympyPrinter): template = "tex%iD(%s, %s)" code = template % ( - node.texture.field.spatial_dimensions, - str(node.texture), + node.interpolator.field.spatial_dimensions, + str(node.interpolator), # + 0.5 comes from Nvidia's staggered indexing ', '.join(self._print(o + 0.5) for o in reversed(node.offsets)) ) diff --git a/pystencils/fd/derivative.py b/pystencils/fd/derivative.py index 7acd245059615ded27e2c1fe023e344b48c34a6e..02d8f74ee912ecbc59b80db0e757e27a26d4686e 100644 --- a/pystencils/fd/derivative.py +++ b/pystencils/fd/derivative.py @@ -111,6 +111,16 @@ class Diff(sp.Expr): def __str__(self): return "D(%s)" % self.arg + def interpolated_access(self, offset, **kwargs): + """Represents an interpolated access on a spatially differentiated field + + Args: + offset (Tuple[sympy.Expr]): Absolute position to determine the value of the spatial derivative + """ + from pystencils.interpolation_astnodes import DiffInterpolatorAccess + assert isinstance(self.arg.field, Field), "Must be field to enable interpolated accesses" + return DiffInterpolatorAccess(self.arg.field.interpolated_access(offset, **kwargs).symbol, self.target, *offset) + class DiffOperator(sp.Expr): """Un-applied differential, i.e. differential operator diff --git a/pystencils/fd/finitedifferences.py b/pystencils/fd/finitedifferences.py index d5bce66e96d8aa5d296df68906c8871d40c08bba..5b6b15f95bebec40939d54a8ff2ad6c58c169f58 100644 --- a/pystencils/fd/finitedifferences.py +++ b/pystencils/fd/finitedifferences.py @@ -109,7 +109,9 @@ class Discretization2ndOrder: return self._discretize_advection(e) elif isinstance(e, Diff): arg, *indices = diff_args(e) - if not isinstance(arg, Field.Access): + from pystencils.interpolation_astnodes import InterpolatorAccess + + if not isinstance(arg, (Field.Access, InterpolatorAccess)): raise ValueError("Only derivatives with field or field accesses as arguments can be discretized") return self.spatial_stencil(indices, self.dx, arg) else: diff --git a/pystencils/field.py b/pystencils/field.py index 0bf80b211348b6e5c1975a86f4d51fc50cb9fdd4..6b85dead11778bf5aa6a820d4f34e8b547a95df4 100644 --- a/pystencils/field.py +++ b/pystencils/field.py @@ -332,7 +332,7 @@ class Field(AbstractField): self.latex_name: Optional[str] = None self.coordinate_origin: tuple[float, sp.Symbol] = sp.Matrix(tuple( 0 for _ in range(self.spatial_dimensions) - )) # type + )) self.coordinate_transform = sp.eye(self.spatial_dimensions) if field_type == FieldType.STAGGERED: assert self.staggered_stencil diff --git a/pystencils/gpucuda/cudajit.py b/pystencils/gpucuda/cudajit.py index 28ec47d0ba22e92cd4064b3400a0d055a87be7cb..86f251c97b95f04c52aaa803398ddb78ba42a2c1 100644 --- a/pystencils/gpucuda/cudajit.py +++ b/pystencils/gpucuda/cudajit.py @@ -5,13 +5,20 @@ from pystencils.data_types import StructType from pystencils.field import FieldType from pystencils.gpucuda.texture_utils import ndarray_to_tex from pystencils.include import get_pycuda_include_path, get_pystencils_include_path -from pystencils.interpolation_astnodes import TextureAccess +from pystencils.interpolation_astnodes import InterpolatorAccess, TextureCachedField from pystencils.kernel_wrapper import KernelWrapper from pystencils.kernelparameters import FieldPointerSymbol USE_FAST_MATH = True +def get_cubic_interpolation_include_paths(): + from os.path import join, dirname + + return [join(dirname(__file__), "CubicInterpolationCUDA", "code"), + join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")] + + def make_python_function(kernel_function_node, argument_dict=None, custom_backend=None): """ Creates a kernel function from an abstract syntax tree which @@ -39,23 +46,28 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen code += "#define FUNC_PREFIX __global__\n" code += "#define RESTRICT __restrict__\n\n" code += str(generate_c(kernel_function_node, dialect='cuda', custom_backend=custom_backend)) - textures = set(d.texture for d in kernel_function_node.atoms(TextureAccess)) + textures = set(d.interpolator for d in kernel_function_node.atoms( + InterpolatorAccess) if isinstance(d.interpolator, TextureCachedField)) nvcc_options = ["-w", "-std=c++11", "-Wno-deprecated-gpu-targets"] if USE_FAST_MATH: nvcc_options.append("-use_fast_math") - # Code for - # if any(t.interpolation_mode == InterpolationMode.CUBIC_SPLINE for t in textures): - # assert isdir(join(dirname(__file__), "CubicInterpolationCUDA", "code")), \ - # "Submodule CubicInterpolationCUDA does not exist" - # nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code")] - # nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")] - - # needed_dims = set(t.field.spatial_dimensions for t in textures - # if t.interpolation_mode == InterpolationMode.CUBIC_SPLINE) - # for i in needed_dims: - # code = 'extern "C++" {\n#include "cubicTex%iD.cu"\n}\n' % i + code + # Code for CubicInterpolationCUDA + from pystencils.interpolation_astnodes import InterpolationMode + from os.path import join, dirname, isdir + + if any(t.interpolation_mode == InterpolationMode.CUBIC_SPLINE for t in textures): + assert isdir(join(dirname(__file__), ("CubicInterpolationCUDA", "code")), + "Submodule CubicInterpolationCUDA does not exist.\n" + + "Clone https://github.com/theHamsta/CubicInterpolationCUDA into pystencils.gpucuda") + nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code")] + nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")] + + needed_dims = set(t.field.spatial_dimensions for t in textures + if t.interpolation_mode == InterpolationMode.CUBIC_SPLINE) + for i in needed_dims: + code = 'extern "C++" {\n#include "cubicTex%iD.cu"\n}\n' % i + code mod = SourceModule(code, options=nvcc_options, include_dirs=[ get_pystencils_include_path(), get_pycuda_include_path()]) diff --git a/pystencils/gpucuda/texture_utils.py b/pystencils/gpucuda/texture_utils.py index d29d862929530b27f840da445467a2bae44b1bfb..ff3db430fc841f4121b9602b57a1c1aa08ef86f9 100644 --- a/pystencils/gpucuda/texture_utils.py +++ b/pystencils/gpucuda/texture_utils.py @@ -15,6 +15,7 @@ import numpy as np try: import pycuda.driver as cuda from pycuda import gpuarray + import pycuda except Exception: pass @@ -35,6 +36,8 @@ def ndarray_to_tex(tex_ref, use_normalized_coordinates=False, read_as_integer=False): + if isinstance(address_mode, str): + address_mode = getattr(pycuda.driver.address_mode, address_mode.upper()) if address_mode is None: address_mode = cuda.address_mode.BORDER if filter_mode is None: diff --git a/pystencils/interpolation_astnodes.py b/pystencils/interpolation_astnodes.py index 62d291b08e634cc0ad57eb37dca7a779581d88e1..76bd340b7e2c9bd66d9c82f77bc0399208017e52 100644 --- a/pystencils/interpolation_astnodes.py +++ b/pystencils/interpolation_astnodes.py @@ -35,6 +35,27 @@ class InterpolationMode(str, Enum): CUBIC_SPLINE = "cubic_spline" +class _InterpolationSymbol(TypedSymbol): + + def __new__(cls, name, field, interpolator): + obj = cls.__xnew_cached_(cls, name, field, interpolator) + return obj + + def __new_stage2__(cls, name, field, interpolator): + obj = super().__xnew__(cls, name, 'dummy_symbol_carrying_field' + field.name) + obj.field = field + obj.interpolator = interpolator + return obj + + def __getnewargs__(self): + return self.name, self.field, self.interpolator + + # noinspection SpellCheckingInspection + __xnew__ = staticmethod(__new_stage2__) + # noinspection SpellCheckingInspection + __xnew_cached_ = staticmethod(cacheit(__new_stage2__)) + + class Interpolator(object): """ Implements non-integer accesses on fields using linear interpolation. @@ -77,24 +98,25 @@ class Interpolator(object): allow_textures=True): super().__init__() - self.field = parent_field.new_field_with_different_name(parent_field.name) + self.field = parent_field self.field.field_type = pystencils.field.FieldType.CUSTOM self.address_mode = address_mode self.use_normalized_coordinates = use_normalized_coordinates - hash_str = "%x" % abs(hash(self.field) + hash(address_mode)) - self.symbol = TypedSymbol('dummy_symbol_carrying_field' + self.field.name + hash_str, - 'dummy_symbol_carrying_field' + self.field.name + hash_str) - self.symbol.field = self.field - self.symbol.interpolator = self - self.allow_textures = allow_textures self.interpolation_mode = interpolation_mode + self.hash_str = hashlib.md5( + f'{self.field}_{address_mode}_{self.field.dtype}_{interpolation_mode}'.encode()).hexdigest() + self.symbol = _InterpolationSymbol(str(self), parent_field, self) + self.allow_textures = allow_textures + + @property + def ndim(self): + return self.field.ndim @property def _hashable_contents(self): return (str(self.address_mode), str(type(self)), - self.symbol, - self.address_mode, + self.hash_str, self.use_normalized_coordinates) def at(self, offset): @@ -112,6 +134,9 @@ class Interpolator(object): def __hash__(self): return hash(self._hashable_contents) + def __eq__(self, other): + return hash(self) == hash(other) + @property def reproducible_hash(self): return _hash(str(self._hashable_contents).encode()).hexdigest() @@ -142,29 +167,43 @@ class NearestNeightborInterpolator(Interpolator): class InterpolatorAccess(TypedSymbol): - def __new__(cls, field, offsets, *args, **kwargs): - obj = TextureAccess.__xnew_cached_(cls, field, offsets, *args, **kwargs) + def __new__(cls, field, *offsets, **kwargs): + obj = InterpolatorAccess.__xnew_cached_(cls, field, *offsets, **kwargs) return obj - def __new_stage2__(self, symbol, *offsets): + def __new_stage2__(cls, symbol, *offsets): assert offsets is not None - obj = super().__xnew__(self, '%s_interpolator_%x' % - (symbol.field.name, abs(hash(tuple(offsets)))), symbol.field.dtype) + obj = super().__xnew__(cls, '%s_interpolator_%s' % + (symbol.field.name, _hash(str(tuple(offsets)).encode()).hexdigest()), + symbol.field.dtype) obj.offsets = offsets obj.symbol = symbol obj.field = symbol.field obj.interpolator = symbol.interpolator return obj - def __hash__(self): - return hash((self.symbol, self.field, tuple(self.offsets), self.interpolator)) + def _hashable_contents(self): + return super()._hashable_content() + ((self.symbol, self.field, tuple(self.offsets), self.symbol.interpolator)) def __str__(self): - return '%s_interpolator(%s)' % (self.field.name, ','.join(str(o) for o in self.offsets)) + return '%s_interpolator(%s)' % (self.field.name, ', '.join(str(o) for o in self.offsets)) def __repr__(self): return self.__str__() + def _latex(self, printer, *_): + n = self.field.latex_name if self.field.latex_name else self.field.name + foo = ", ".join(str(printer.doprint(o)) for o in self.offsets) + return f'{n}_{{interpolator}}\\left({foo}\\right)' + + @property + def ndim(self): + return len(self.offsets) + + @property + def is_texture(self): + return isinstance(self.interpolator, TextureCachedField) + def atoms(self, *types): if self.offsets: offsets = set(o for o in self.offsets if isinstance(o, types)) @@ -177,6 +216,11 @@ class InterpolatorAccess(TypedSymbol): else: return set() + def neighbor(self, coord_id, offset): + offset_list = list(self.offsets) + offset_list[coord_id] += offset + return self.interpolator.at(tuple(offset_list)) + @property def free_symbols(self): symbols = set() @@ -189,6 +233,13 @@ class InterpolatorAccess(TypedSymbol): return symbols + @property + def required_global_declarations(self): + required_global_declarations = self.symbol.interpolator.required_global_declarations + if required_global_declarations: + required_global_declarations[0]._symbols_defined.add(self) + return required_global_declarations + @property def args(self): return [self.symbol, *self.offsets] @@ -201,14 +252,27 @@ class InterpolatorAccess(TypedSymbol): def interpolation_mode(self): return self.interpolator.interpolation_mode + @property + def _diff_interpolation_vec(self): + return sp.Matrix([DiffInterpolatorAccess(self.symbol, i, *self.offsets) + for i in range(len(self.offsets))]) + + def diff(self, *symbols, **kwargs): + if symbols == (self,): + return 1 + rtn = self._diff_interpolation_vec.T * sp.Matrix(self.offsets).diff(*symbols, **kwargs) + if rtn.shape == (1, 1): + rtn = rtn[0, 0] + return rtn + def implementation_with_stencils(self): field = self.field default_int_type = create_type('int64') - use_textures = isinstance(self, TextureAccess) + use_textures = isinstance(self.interpolator, TextureCachedField) if use_textures: def absolute_access(x, _): - return self.texture.at((o for o in x)) + return self.symbol.interpolator.at((o for o in x)) else: absolute_access = field.absolute_access @@ -255,7 +319,7 @@ class InterpolatorAccess(TypedSymbol): for (dim, i) in enumerate(index)] index = [cast_func(sp.Piecewise((i, i > 0), (sp.Abs(cast_func(field.shape[dim] - 1 + i, default_int_type)), - True)), default_int_type) + True)), default_int_type) for (dim, i) in enumerate(index)] sum[channel_idx] += weight * \ absolute_access(index, channel_idx if field.index_dimensions else ()) @@ -287,12 +351,61 @@ class InterpolatorAccess(TypedSymbol): # noinspection SpellCheckingInspection __xnew_cached_ = staticmethod(cacheit(__new_stage2__)) + def __getnewargs__(self): + return tuple(self.symbol, *self.offsets) + + +class DiffInterpolatorAccess(InterpolatorAccess): + def __new__(cls, symbol, diff_coordinate_idx, *offsets, **kwargs): + if symbol.interpolator.interpolation_mode == InterpolationMode.LINEAR: + from pystencils.fd import Diff, Discretization2ndOrder + return Discretization2ndOrder(1)(Diff(symbol.interpolator.at(offsets), diff_coordinate_idx)) + obj = DiffInterpolatorAccess.__xnew_cached_(cls, symbol, diff_coordinate_idx, *offsets, **kwargs) + return obj + + def __new_stage2__(self, symbol: sp.Symbol, diff_coordinate_idx, *offsets): + assert offsets is not None + obj = super().__xnew__(self, symbol, *offsets) + obj.diff_coordinate_idx = diff_coordinate_idx + return obj + + def __hash__(self): + return hash((self.symbol, self.field, self.diff_coordinate_idx, tuple(self.offsets), self.interpolator)) + + def __str__(self): + return '%s_diff%i_interpolator(%s)' % (self.field.name, self.diff_coordinate_idx, + ', '.join(str(o) for o in self.offsets)) + + def __repr__(self): + return str(self) + + @property + def args(self): + return [self.symbol, self.diff_coordinate_idx, *self.offsets] + + @property + def symbols_defined(self) -> Set[sp.Symbol]: + return {self} + + @property + def interpolation_mode(self): + return self.interpolator.interpolation_mode + + # noinspection SpellCheckingInspection + __xnew__ = staticmethod(__new_stage2__) + # noinspection SpellCheckingInspection + __xnew_cached_ = staticmethod(cacheit(__new_stage2__)) + + def __getnewargs__(self): + return tuple(self.symbol, self.diff_coordinate_idx, *self.offsets) + + ########################################################################################## # GPU-specific fast specializations (for precision GPUs can also use above nodes/symbols # ########################################################################################## -class TextureCachedField: +class TextureCachedField(Interpolator): def __init__(self, parent_field, address_mode=None, @@ -301,28 +414,19 @@ class TextureCachedField: use_normalized_coordinates=False, read_as_integer=False ): - if isinstance(address_mode, str): - address_mode = getattr(pycuda.driver.address_mode, address_mode.upper()) + super().__init__(parent_field, interpolation_mode, address_mode, use_normalized_coordinates) if address_mode is None: - address_mode = pycuda.driver.address_mode.BORDER + address_mode = 'border' if filter_mode is None: filter_mode = pycuda.driver.filter_mode.LINEAR - # self, field_name, field_type, dtype, layout, shape, strides - self.field = parent_field - self.address_mode = address_mode - self.filter_mode = filter_mode self.read_as_integer = read_as_integer - self.use_normalized_coordinates = use_normalized_coordinates - self.interpolation_mode = interpolation_mode - self.symbol = TypedSymbol(str(self), self.field.dtype.numpy_dtype) - self.symbol.interpolator = self - self.symbol.field = self.field self.required_global_declarations = [TextureDeclaration(self)] - # assert str(self.field.dtype) != 'double', "CUDA does not support double textures!" - # assert dtype_supports_textures(self.field.dtype), "CUDA only supports texture types with 32 bits or less" + @property + def ndim(self): + return self.field.ndim @classmethod def from_interpolator(cls, interpolator: LinearInterpolator): @@ -332,59 +436,17 @@ class TextureCachedField: obj = cls(interpolator.field, interpolator.address_mode, interpolation_mode=interpolator.interpolation_mode) return obj - def at(self, offset): - return TextureAccess(self.symbol, *offset) - - def __getitem__(self, offset): - return TextureAccess(self.symbol, *offset) - def __str__(self): return '%s_texture_%s' % (self.field.name, self.reproducible_hash) def __repr__(self): return self.__str__() - @property - def _hashable_contents(self): - return (type(self), - self.address_mode, - self.filter_mode, - self.read_as_integer, - self.interpolation_mode, - self.use_normalized_coordinates) - - def __hash__(self): - return hash(self._hashable_contents) - @property def reproducible_hash(self): return _hash(str(self._hashable_contents).encode()).hexdigest() -class TextureAccess(InterpolatorAccess): - def __new__(cls, texture_symbol, offsets, *args, **kwargs): - obj = TextureAccess.__xnew_cached_(cls, texture_symbol, offsets, *args, **kwargs) - return obj - - def __new_stage2__(self, symbol, *offsets): - obj = super().__xnew__(self, symbol, *offsets) - obj.required_global_declarations = symbol.interpolator.required_global_declarations - obj.required_global_declarations[0]._symbols_defined.add(obj) - return obj - - def __str__(self): - return '%s_texture(%s)' % (self.interpolator.field.name, ','.join(str(o) for o in self.offsets)) - - @property - def texture(self): - return self.interpolator - - # noinspection SpellCheckingInspection - __xnew__ = staticmethod(__new_stage2__) - # noinspection SpellCheckingInspection - __xnew_cached_ = staticmethod(cacheit(__new_stage2__)) - - class TextureDeclaration(Node): """ A global declaration of a texture. Visible both for device and host code. @@ -421,12 +483,18 @@ class TextureDeclaration(Node): @property def headers(self): - return ['"pycuda-helpers.hpp"'] + headers = ['"pycuda-helpers.hpp"'] + if self.texture.interpolation_mode == InterpolationMode.CUBIC_SPLINE: + headers.append('"cubicTex%iD.cu"' % self.texture.ndim) + return headers def __str__(self): from pystencils.backends.cuda_backend import CudaBackend return CudaBackend()(self) + def __repr__(self): + return str(self) + class TextureObject(TextureDeclaration): """ diff --git a/pystencils/math_optimizations.py b/pystencils/math_optimizations.py index ad0114782e15977fb329baacfb6ad4b6bac1e33b..b9420318f1a0a5b7a4aa538c35b6866672282bea 100644 --- a/pystencils/math_optimizations.py +++ b/pystencils/math_optimizations.py @@ -44,3 +44,13 @@ def optimize_assignments(assignments, optimizations): a.optimize(optimizations) return assignments + + +def optimize_ast(ast, optimizations): + + if HAS_REWRITING: + assignments_nodes = ast.atoms(SympyAssignment) + for a in assignments_nodes: + a.optimize(optimizations) + + return ast diff --git a/pystencils/simp/assignment_collection.py b/pystencils/simp/assignment_collection.py index 22968eb72361b15dd24a0ebc72fd284ddeddd2b7..706008c859bd0cdc9f2f3b892557242a5aa01184 100644 --- a/pystencils/simp/assignment_collection.py +++ b/pystencils/simp/assignment_collection.py @@ -156,6 +156,9 @@ class AssignmentCollection: """See :func:`count_operations` """ return count_operations(self.all_assignments, only_type=None) + def atoms(self, *args): + return set().union(*[a.atoms(*args) for a in self.all_assignments]) + def dependent_symbols(self, symbols: Iterable[sp.Symbol]) -> Set[sp.Symbol]: """Returns all symbols that depend on one of the passed symbols. diff --git a/pystencils/transformations.py b/pystencils/transformations.py index bb089ccb91ea5dfd678d69d6990981fdd28793fc..448991a842f2bd9cee6a47fc559fe16bbf8996ed 100644 --- a/pystencils/transformations.py +++ b/pystencils/transformations.py @@ -14,8 +14,8 @@ import pystencils.astnodes as ast import pystencils.integer_functions from pystencils.assignment import Assignment from pystencils.data_types import ( - PointerType, StructType, TypedImaginaryUnit, TypedSymbol, cast_func, collate_types, create_type, get_base_type, - get_type_of_expression, pointer_arithmetic_func, reinterpret_cast_func) + PointerType, StructType, TypedImaginaryUnit, TypedSymbol, cast_func, collate_types, create_type, + get_base_type, get_type_of_expression, pointer_arithmetic_func, reinterpret_cast_func) from pystencils.field import AbstractField, Field, FieldType from pystencils.kernelparameters import FieldPointerSymbol from pystencils.simp.assignment_collection import AssignmentCollection @@ -1314,7 +1314,7 @@ def implement_interpolations(ast_node: ast.Node, implement_by_texture_accesses: bool = False, vectorize: bool = False, use_hardware_interpolation_for_f32=True): - from pystencils.interpolation_astnodes import InterpolatorAccess, TextureAccess, TextureCachedField + from pystencils.interpolation_astnodes import (InterpolatorAccess, TextureCachedField) # TODO: perform this function on assignments, when unify_shape_symbols allows differently sized fields assert not(implement_by_texture_accesses and vectorize), \ @@ -1322,37 +1322,42 @@ def implement_interpolations(ast_node: ast.Node, FLOAT32_T = create_type('float32') interpolation_accesses = ast_node.atoms(InterpolatorAccess) + if not interpolation_accesses: + return ast_node def can_use_hw_interpolation(i): - return use_hardware_interpolation_for_f32 and i.dtype == FLOAT32_T and isinstance(i, TextureAccess) + return (use_hardware_interpolation_for_f32 + and implement_by_texture_accesses + and i.dtype == FLOAT32_T + and isinstance(i.symbol.interpolator, TextureCachedField)) if implement_by_texture_accesses: - interpolators = {a.symbol.interpolator for a in interpolation_accesses} - to_texture_map = {i: TextureCachedField.from_interpolator(i) for i in interpolators} + for i in interpolation_accesses: + from pystencils.interpolation_astnodes import _InterpolationSymbol - substitutions = {i: to_texture_map[i.symbol.interpolator].at( - [o for o in i.offsets]) for i in interpolation_accesses} - - try: - import pycuda.driver as cuda - for texture in substitutions.values(): - if can_use_hw_interpolation(texture): + try: + import pycuda.driver as cuda + texture = TextureCachedField.from_interpolator(i.interpolator) + if can_use_hw_interpolation(i): texture.filter_mode = cuda.filter_mode.LINEAR else: texture.filter_mode = cuda.filter_mode.POINT texture.read_as_integer = True - except Exception: - pass + except Exception as e: + raise e + i.symbol = _InterpolationSymbol(str(texture), i.symbol.field, texture) - if isinstance(ast_node, AssignmentCollection): - ast_node = ast_node.subs(substitutions) - else: - ast_node.subs(substitutions) + # from pystencils.math_optimizations import ReplaceOptim, optimize_ast - # Update after replacements - interpolation_accesses = ast_node.atoms(InterpolatorAccess) + # ImplementInterpolationByStencils = ReplaceOptim(lambda e: isinstance(e, InterpolatorAccess) + # and not can_use_hw_interpolation(i), + # lambda e: e.implementation_with_stencils() + # ) + # RemoveConjugate = ReplaceOptim(lambda e: isinstance(e, sp.conjugate), + # lambda e: e.args[0] + # ) if vectorize: # TODO can be done in _interpolator_access_to_stencils field.absolute_access == simd_gather raise NotImplementedError() diff --git a/pystencils_tests/test_interpolation.py b/pystencils_tests/test_interpolation.py index 4ead7a0fc2b489eb874abc7e9fde8cb043d5d770..b35427f51619ecad198a7a512c2744204c3c43dd 100644 --- a/pystencils_tests/test_interpolation.py +++ b/pystencils_tests/test_interpolation.py @@ -110,52 +110,45 @@ def test_rotate_interpolation(address_mode): pyconrad.imshow(out, "out " + address_mode) -@pytest.mark.parametrize('address_mode', ['border', 'wrap', 'clamp', 'mirror']) -def test_rotate_interpolation_gpu(address_mode): +@pytest.mark.parametrize('dtype', (np.int32, np.float32, np.float64)) +@pytest.mark.parametrize('address_mode', ('border', 'wrap', 'clamp', 'mirror')) +@pytest.mark.parametrize('use_textures', ('use_textures', False)) +def test_rotate_interpolation_gpu(dtype, address_mode, use_textures): rotation_angle = sympy.pi / 5 scale = 1 - previous_result = None - for dtype in [np.int32, np.float32, np.float64]: - if dtype == np.int32: - lenna_gpu = gpuarray.to_gpu( - np.ascontiguousarray(lenna * 255, dtype)) - else: - lenna_gpu = gpuarray.to_gpu( - np.ascontiguousarray(lenna, dtype)) - for use_textures in [True, False]: - x_f, y_f = pystencils.fields('x,y: %s [2d]' % type_map[dtype], ghost_layers=0) + if dtype == np.int32: + lenna_gpu = gpuarray.to_gpu( + np.ascontiguousarray(lenna * 255, dtype)) + else: + lenna_gpu = gpuarray.to_gpu( + np.ascontiguousarray(lenna, dtype)) + x_f, y_f = pystencils.fields('x,y: %s [2d]' % type_map[dtype], ghost_layers=0) - transformed = scale * sympy.rot_axis3(rotation_angle)[:2, :2] * \ - sympy.Matrix((x_, y_)) - sympy.Matrix([2, 2]) - assignments = pystencils.AssignmentCollection({ - y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed) - }) - print(assignments) - ast = pystencils.create_kernel(assignments, target='gpu', use_textures_for_interpolation=use_textures) - print(ast) - pystencils.show_code(ast) - kernel = ast.compile() + transformed = scale * \ + sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) - sympy.Matrix([2, 2]) + assignments = pystencils.AssignmentCollection({ + y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed) + }) + print(assignments) + ast = pystencils.create_kernel(assignments, target='gpu', use_textures_for_interpolation=use_textures) + print(ast) + print(pystencils.show_code(ast)) + kernel = ast.compile() - out = gpuarray.zeros_like(lenna_gpu) - kernel(x=lenna_gpu, y=out) - pyconrad.imshow(out, - f"out {address_mode} texture:{use_textures} {type_map[dtype]}") - skimage.io.imsave(f"/tmp/out {address_mode} texture:{use_textures} {type_map[dtype]}.tif", - np.ascontiguousarray(out.get(), np.float32)) - if previous_result is not None: - try: - assert np.allclose(previous_result[4:-4, 4:-4], out.get()[4:-4, 4:-4], rtol=100, atol=1e-3) - except AssertionError: # NOQA - print("Max error: %f" % np.max(previous_result - out.get())) - # pyconrad.imshow(previous_result - out.get(), "Difference image") - # raise e - previous_result = out.get() + out = gpuarray.zeros_like(lenna_gpu) + kernel(x=lenna_gpu, y=out) + pyconrad.imshow(out, + f"out {address_mode} texture:{use_textures} {type_map[dtype]}") + skimage.io.imsave(f"/tmp/out {address_mode} texture:{use_textures} {type_map[dtype]}.tif", + np.ascontiguousarray(out.get(), np.float32)) @pytest.mark.parametrize('address_mode', ['border', 'wrap', 'clamp', 'mirror']) -def test_shift_interpolation_gpu(address_mode): +@pytest.mark.parametrize('dtype', [np.float64, np.float32, np.int32]) +@pytest.mark.parametrize('use_textures', ('use_textures', False,)) +def test_shift_interpolation_gpu(address_mode, dtype, use_textures): rotation_angle = 0 # sympy.pi / 5 scale = 1 @@ -218,7 +211,7 @@ def test_rotate_interpolation_size_change(address_mode): @pytest.mark.parametrize('address_mode, target', - itertools.product(['border', 'wrap', 'clamp', 'mirror'], ['cpu', 'gpu'])) + itertools.product(['border', 'wrap', 'clamp', 'mirror'], ['cpu'])) def test_field_interpolated(address_mode, target): x_f, y_f = pystencils.fields('x,y: float64 [2d]') @@ -226,7 +219,7 @@ def test_field_interpolated(address_mode, target): y_f.center(): x_f.interpolated_access([0.5 * x_ + 2.7, 0.25 * y_ + 7.2], address_mode=address_mode) }) print(assignments) - ast = pystencils.create_kernel(assignments) + ast = pystencils.create_kernel(assignments, target=target) print(ast) pystencils.show_code(ast) kernel = ast.compile() @@ -234,5 +227,11 @@ def test_field_interpolated(address_mode, target): out = np.zeros_like(lenna) kernel(x=lenna, y=out) pyconrad.imshow(out, "out " + address_mode) - kernel(x=lenna, y=out) - pyconrad.imshow(out, "out " + address_mode) + + +def test_spatial_derivative(): + x, y = pystencils.fields('x, y: float32[2d]') + tx, ty = pystencils.fields('t_x, t_y: float32[2d]') + + diff = sympy.diff(x.interpolated_access((tx.center, ty.center)), tx.center) + print("diff: " + str(diff)) diff --git a/pystencils_tests/test_opencl.py b/pystencils_tests/test_opencl.py index 42c53336749c11c02bff612522c53905c4e2d4c5..ac9aaad9f8584fead6fa4ea198cd6c7419226139 100644 --- a/pystencils_tests/test_opencl.py +++ b/pystencils_tests/test_opencl.py @@ -5,11 +5,13 @@ import sympy as sp import pystencils from pystencils.backends.cuda_backend import CudaBackend from pystencils.backends.opencl_backend import OpenClBackend -from pystencils.opencl.opencljit import get_global_cl_queue, init_globally, make_python_function +from pystencils.opencl.opencljit import get_global_cl_queue, make_python_function try: import pyopencl as cl HAS_OPENCL = True + import pystencils.opencl.autoinit + except Exception: HAS_OPENCL = False @@ -244,9 +246,6 @@ def test_kernel_creation(): print(assignments) - pystencils.opencl.clear_global_ctx() - - import pystencils.opencl.autoinit ast = pystencils.create_kernel(assignments, target='opencl') print(ast.backend) diff --git a/pystencils_tests/test_sympy_optimizations.py b/pystencils_tests/test_sympy_optimizations.py index 0899d5b10cd87ff1e6acc630c8c88ea865005be5..b22ad9d7fb07c84b88388455b45e76f6b5decc3a 100644 --- a/pystencils_tests/test_sympy_optimizations.py +++ b/pystencils_tests/test_sympy_optimizations.py @@ -16,6 +16,7 @@ def test_sympy_optimizations(): }) assignments = optimize_assignments(assignments, optims_pystencils_cpu) + print(assignments) ast = pystencils.create_kernel(assignments, target=target) code = pystencils.get_code_str(ast)