diff --git a/pystencils/__init__.py b/pystencils/__init__.py index a402a4d9638e92c878bcc8773108ce6c8664c70d..ed08ff1327c68a1a116edb21547a86f8b27a8c81 100644 --- a/pystencils/__init__.py +++ b/pystencils/__init__.py @@ -12,6 +12,8 @@ from .kernelcreation import create_indexed_kernel, create_kernel, create_stagger from .simp import AssignmentCollection from .slicing import make_slice from .sympyextensions import SymbolCreator +from .spatial_coordinates import (x_, x_staggered, x_staggered_vector, x_vector, + y_, y_staggered, z_, z_staggered) try: import pystencils_autodiff @@ -30,5 +32,8 @@ __all__ = ['Field', 'FieldType', 'fields', 'SymbolCreator', 'create_data_handling', 'kernel', + 'x_', 'y_', 'z_', + 'x_staggered', 'y_staggered', 'z_staggered', + 'x_vector', 'x_staggered_vector', 'fd', 'stencil'] diff --git a/pystencils/astnodes.py b/pystencils/astnodes.py index 67ad6869fa6b98b9b807940a742afb7d9544097a..5cb8c8d3de7a7118d7696c33298e544c09b38db3 100644 --- a/pystencils/astnodes.py +++ b/pystencils/astnodes.py @@ -1,3 +1,5 @@ +import collections.abc +import itertools import uuid from typing import Any, List, Optional, Sequence, Set, Union @@ -33,7 +35,7 @@ class Node: raise NotImplementedError() def subs(self, subs_dict) -> None: - """Inplace! substitute, similar to sympy's but modifies the AST inplace.""" + """Inplace! Substitute, similar to sympy's but modifies the AST inplace.""" for a in self.args: a.subs(subs_dict) @@ -102,7 +104,8 @@ class Conditional(Node): result = self.true_block.undefined_symbols if self.false_block: result.update(self.false_block.undefined_symbols) - result.update(self.condition_expr.atoms(sp.Symbol)) + if hasattr(self.condition_expr, 'atoms'): + result.update(self.condition_expr.atoms(sp.Symbol)) return result def __str__(self): @@ -212,9 +215,16 @@ class KernelFunction(Node): """Set of Field instances: fields which are accessed inside this kernel function""" return set(o.field for o in self.atoms(ResolvedFieldAccess)) - def fields_written(self): - assigments = self.atoms(SympyAssignment) - return {a.lhs.field for a in assigments if isinstance(a.lhs, ResolvedFieldAccess)} + @property + def fields_written(self) -> Set['ResolvedFieldAccess']: + assignments = self.atoms(SympyAssignment) + return {a.lhs.field for a in assignments if isinstance(a.lhs, ResolvedFieldAccess)} + + @property + def fields_read(self) -> Set['ResolvedFieldAccess']: + assignments = self.atoms(SympyAssignment) + return set().union(itertools.chain.from_iterable([f.field for f in a.rhs.free_symbols if hasattr(f, 'field')] + for a in assignments)) def get_parameters(self) -> Sequence['KernelFunction.Parameter']: """Returns list of parameters for this function. @@ -283,8 +293,15 @@ class Block(Node): a.subs(subs_dict) def insert_front(self, node): - node.parent = self - self._nodes.insert(0, node) + if isinstance(node, collections.abc.Iterable): + node = list(node) + for n in node: + n.parent = self + + self._nodes = node + self._nodes + else: + node.parent = self + self._nodes.insert(0, node) def insert_before(self, new_node, insert_before): new_node.parent = self @@ -485,7 +502,7 @@ class SympyAssignment(Node): def __init__(self, lhs_symbol, rhs_expr, is_const=True): super(SympyAssignment, self).__init__(parent=None) self._lhs_symbol = lhs_symbol - self.rhs = rhs_expr + self.rhs = sp.simplify(rhs_expr) self._is_const = is_const self._is_declaration = self.__is_declaration() @@ -678,3 +695,49 @@ def early_out(condition): def get_dummy_symbol(dtype='bool'): return TypedSymbol('dummy%s' % uuid.uuid4().hex, create_type(dtype)) + + +class SourceCodeComment(Node): + def __init__(self, text): + self.text = text + + @property + def args(self): + return [] + + @property + def symbols_defined(self): + return set() + + @property + def undefined_symbols(self): + return set() + + def __str__(self): + return "/* " + self.text + " */" + + def __repr__(self): + return self.__str__() + + +class EmptyLine(Node): + def __init__(self): + pass + + @property + def args(self): + return [] + + @property + def symbols_defined(self): + return set() + + @property + def undefined_symbols(self): + return set() + + def __str__(self): + return "" + + def __repr__(self): + return self.__str__() diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py index ec5a930fe9cba4fbe71b8cb5ff3086d4be216073..9f454168670baf2419b25cb7648edf46e40beebf 100644 --- a/pystencils/backends/cbackend.py +++ b/pystencils/backends/cbackend.py @@ -102,6 +102,10 @@ def get_headers(ast_node: Node) -> Set[str]: if isinstance(a, Node): headers.update(get_headers(a)) + for g in get_global_declarations(ast_node): + if isinstance(g, Node): + headers.update(get_headers(g)) + return sorted(headers) @@ -131,6 +135,12 @@ class CustomCodeNode(Node): def undefined_symbols(self): return self._symbols_read - self._symbols_defined + def __eq___(self, other): + return self._code == other._code + + def __hash__(self): + return hash(self._code) + class PrintNode(CustomCodeNode): # noinspection SpellCheckingInspection @@ -263,6 +273,12 @@ class CBackend: def _print_CustomCodeNode(self, node): return node.get_code(self._dialect, self._vector_instruction_set) + def _print_SourceCodeComment(self, node): + return "/* " + node.text + " */" + + def _print_EmptyLine(self, node): + return "" + def _print_Conditional(self, node): cond_type = get_type_of_expression(node.condition_expr) if isinstance(cond_type, VectorType): @@ -409,6 +425,7 @@ class CustomSympyPrinter(CCodePrinter): condition=self._print(var) + ' <= ' + self._print(end) # if start < end else '>=' ) return code + _print_Max = C89CodePrinter._print_Max _print_Min = C89CodePrinter._print_Min diff --git a/pystencils/backends/cuda_backend.py b/pystencils/backends/cuda_backend.py index 3c1a3cd8bf9629c0b003cc8b6c38693c4426e282..8bc58468901a006bc7ef8f278e3e7a544234de52 100644 --- a/pystencils/backends/cuda_backend.py +++ b/pystencils/backends/cuda_backend.py @@ -3,6 +3,7 @@ from os.path import dirname, join from pystencils.astnodes import Node from pystencils.backends.cbackend import CBackend, CustomSympyPrinter, generate_c from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt +from pystencils.interpolation_astnodes import InterpolationMode with open(join(dirname(__file__), 'cuda_known_functions.txt')) as f: lines = f.readlines() @@ -43,11 +44,19 @@ class CudaBackend(CBackend): return code def _print_TextureDeclaration(self, node): - code = "texture<%s, cudaTextureType%iD, cudaReadModeElementType> %s;" % ( - str(node.texture.field.dtype), - node.texture.field.spatial_dimensions, - node.texture - ) + + if node.texture.field.dtype.numpy_dtype.itemsize > 4: + code = "texture<fp_tex_%s, cudaTextureType%iD, cudaReadModeElementType> %s;" % ( + str(node.texture.field.dtype), + node.texture.field.spatial_dimensions, + node.texture + ) + else: + code = "texture<%s, cudaTextureType%iD, cudaReadModeElementType> %s;" % ( + str(node.texture.field.dtype), + node.texture.field.spatial_dimensions, + node.texture + ) return code def _print_SkipIteration(self, _): @@ -62,17 +71,23 @@ class CudaSympyPrinter(CustomSympyPrinter): self.known_functions.update(CUDA_KNOWN_FUNCTIONS) def _print_TextureAccess(self, node): + dtype = node.texture.field.dtype.numpy_dtype - if node.texture.cubic_bspline_interpolation: - template = "cubicTex%iDSimple<%s>(%s, %s)" + if node.texture.interpolation_mode == InterpolationMode.CUBIC_SPLINE: + template = "cubicTex%iDSimple(%s, %s)" else: - template = "tex%iD<%s>(%s, %s)" + if dtype.itemsize > 4: + # Use PyCuda hack! + # https://github.com/inducer/pycuda/blob/master/pycuda/cuda/pycuda-helpers.hpp + template = "fp_tex%iD(%s, %s)" + else: + template = "tex%iD(%s, %s)" code = template % ( node.texture.field.spatial_dimensions, - str(node.texture.field.dtype), str(node.texture), - ', '.join(self._print(o) for o in node.offsets) + # + 0.5 comes from Nvidia's staggered indexing + ', '.join(self._print(o + 0.5) for o in reversed(node.offsets)) ) return code diff --git a/pystencils/backends/cuda_known_functions.txt b/pystencils/backends/cuda_known_functions.txt index 42cf554ada57d9055bddc659d53c4eaff664275a..224f4a49d65322446a6b17c696351f9371435887 100644 --- a/pystencils/backends/cuda_known_functions.txt +++ b/pystencils/backends/cuda_known_functions.txt @@ -45,6 +45,7 @@ tex1D tex2D tex3D +sqrtf rsqrtf cbrtf rcbrtf diff --git a/pystencils/backends/json.py b/pystencils/backends/json.py new file mode 100644 index 0000000000000000000000000000000000000000..ac2b2d4a055adb75e6d93272f25e6673e0b8ea6a --- /dev/null +++ b/pystencils/backends/json.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- +# +# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de> +# +# Distributed under terms of the GPLv3 license. + +""" + +""" +import json + +from pystencils.backends.cbackend import CustomSympyPrinter, generate_c + +try: + import toml +except Exception: + class toml: + def dumps(self, *args): + raise ImportError('toml not installed') + + def dump(self, *args): + raise ImportError('toml not installed') + +try: + import yaml +except Exception: + class yaml: + def dumps(self, *args): + raise ImportError('pyyaml not installed') + + def dump(self, *args): + raise ImportError('pyyaml not installed') + + +def expr_to_dict(expr_or_node, with_c_code=True, full_class_names=False): + + self = {'str': str(expr_or_node)} + if with_c_code: + try: + self.update({'c': generate_c(expr_or_node)}) + except Exception: + try: + self.update({'c': CustomSympyPrinter().doprint(expr_or_node)}) + except Exception: + pass + for a in expr_or_node.args: + self.update({str(a.__class__ if full_class_names else a.__class__.__name__): expr_to_dict(a)}) + + return self + + +def print_json(expr_or_node): + dict = expr_to_dict(expr_or_node) + return json.dumps(dict, indent=4) + + +def write_json(filename, expr_or_node): + dict = expr_to_dict(expr_or_node) + with open(filename, 'w') as f: + json.dump(dict, f, indent=4) + + +def print_toml(expr_or_node): + dict = expr_to_dict(expr_or_node, full_class_names=False) + return toml.dumps(dict) + + +def write_toml(filename, expr_or_node): + dict = expr_to_dict(expr_or_node) + with open(filename, 'w') as f: + toml.dump(dict, f) + + +def print_yaml(expr_or_node): + dict = expr_to_dict(expr_or_node, full_class_names=False) + return yaml.dump(dict) + + +def write_yaml(filename, expr_or_node): + dict = expr_to_dict(expr_or_node) + with open(filename, 'w') as f: + yaml.dump(dict, f) diff --git a/pystencils/cpu/kernelcreation.py b/pystencils/cpu/kernelcreation.py index fc6765e589e341b40ebbc778c7c26839531ae826..7859c537fe3fbe50a2e94bb2c2c67622f1c1f852 100644 --- a/pystencils/cpu/kernelcreation.py +++ b/pystencils/cpu/kernelcreation.py @@ -10,8 +10,8 @@ from pystencils.data_types import BasicType, StructType, TypedSymbol, create_typ from pystencils.field import Field, FieldType from pystencils.transformations import ( add_types, filtered_tree_iteration, get_base_buffer_index, get_optimal_loop_ordering, - make_loop_over_domain, move_constants_before_loop, parse_base_pointer_info, - resolve_buffer_accesses, resolve_field_accesses, split_inner_loop) + implement_interpolations, make_loop_over_domain, move_constants_before_loop, + parse_base_pointer_info, resolve_buffer_accesses, resolve_field_accesses, split_inner_loop) AssignmentOrAstNodeList = List[Union[Assignment, ast.Node]] @@ -67,6 +67,7 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke ghost_layers=ghost_layers, loop_order=loop_order) ast_node = KernelFunction(loop_node, 'cpu', 'c', compile_function=make_python_function, ghost_layers=ghost_layer_info, function_name=function_name) + implement_interpolations(body) if split_groups: typed_split_groups = [[type_symbol(s) for s in split_group] for split_group in split_groups] @@ -139,6 +140,8 @@ def create_indexed_kernel(assignments: AssignmentOrAstNodeList, index_fields, fu loop_body = Block([]) loop_node = LoopOverCoordinate(loop_body, coordinate_to_loop_over=0, start=0, stop=index_fields[0].shape[0]) + implement_interpolations(loop_node) + for assignment in assignments: loop_body.append(assignment) diff --git a/pystencils/data_types.py b/pystencils/data_types.py index 45099302b9615164fe329d21f8f85c631d45b1e3..186992534a01be2b2f9115876d641a8e3c9cea83 100644 --- a/pystencils/data_types.py +++ b/pystencils/data_types.py @@ -1,12 +1,15 @@ import ctypes from collections import defaultdict from functools import partial +from typing import Tuple import numpy as np import sympy as sp +import sympy.codegen.ast from sympy.core.cache import cacheit from sympy.logic.boolalg import Boolean +import pystencils from pystencils.cache import memorycache, memorycache_if_hashable from pystencils.utils import all_equal @@ -17,6 +20,26 @@ except ImportError as e: _ir_importerror = e +def typed_symbols(names, dtype, *args): + symbols = sp.symbols(names, *args) + if isinstance(symbols, Tuple): + return tuple(TypedSymbol(str(s), dtype) for s in symbols) + else: + return TypedSymbol(str(symbols), dtype) + + +def matrix_symbols(names, dtype, rows, cols): + if isinstance(names, str): + names = names.replace(' ', '').split(',') + + matrices = [] + for n in names: + symbols = typed_symbols("%s:%i" % (n, rows * cols), dtype) + matrices.append(sp.Matrix(rows, cols, lambda i, j: symbols[i * cols + j])) + + return tuple(matrices) + + # noinspection PyPep8Naming class address_of(sp.Function): is_Atom = True @@ -86,6 +109,11 @@ class cast_func(sp.Function): @property def is_integer(self): + """ + Uses Numpy type hierarchy to determine :func:`sympy.Expr.is_integer` predicate + + For reference: Numpy type hierarchy https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.scalars.html + """ if hasattr(self.dtype, 'numpy_dtype'): return np.issubdtype(self.dtype.numpy_dtype, np.integer) or super().is_integer else: @@ -93,6 +121,9 @@ class cast_func(sp.Function): @property def is_negative(self): + """ + See :func:`.TypedSymbol.is_integer` + """ if hasattr(self.dtype, 'numpy_dtype'): if np.issubdtype(self.dtype.numpy_dtype, np.unsignedinteger): return False @@ -101,6 +132,9 @@ class cast_func(sp.Function): @property def is_nonnegative(self): + """ + See :func:`.TypedSymbol.is_integer` + """ if self.is_negative is False: return True else: @@ -108,6 +142,9 @@ class cast_func(sp.Function): @property def is_real(self): + """ + See :func:`.TypedSymbol.is_integer` + """ if hasattr(self.dtype, 'numpy_dtype'): return np.issubdtype(self.dtype.numpy_dtype, np.integer) or \ np.issubdtype(self.dtype.numpy_dtype, np.floating) or \ @@ -171,6 +208,11 @@ class TypedSymbol(sp.Symbol): # For reference: Numpy type hierarchy https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.scalars.html @property def is_integer(self): + """ + Uses Numpy type hierarchy to determine :func:`sympy.Expr.is_integer` predicate + + For reference: Numpy type hierarchy https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.scalars.html + """ if hasattr(self.dtype, 'numpy_dtype'): return np.issubdtype(self.dtype.numpy_dtype, np.integer) or super().is_integer else: @@ -178,6 +220,9 @@ class TypedSymbol(sp.Symbol): @property def is_negative(self): + """ + See :func:`.TypedSymbol.is_integer` + """ if hasattr(self.dtype, 'numpy_dtype'): if np.issubdtype(self.dtype.numpy_dtype, np.unsignedinteger): return False @@ -186,6 +231,9 @@ class TypedSymbol(sp.Symbol): @property def is_nonnegative(self): + """ + See :func:`.TypedSymbol.is_integer` + """ if self.is_negative is False: return True else: @@ -193,6 +241,9 @@ class TypedSymbol(sp.Symbol): @property def is_real(self): + """ + See :func:`.TypedSymbol.is_integer` + """ if hasattr(self.dtype, 'numpy_dtype'): return np.issubdtype(self.dtype.numpy_dtype, np.integer) or \ np.issubdtype(self.dtype.numpy_dtype, np.floating) or \ @@ -370,12 +421,17 @@ def peel_off_type(dtype, type_to_peel_off): return dtype -def collate_types(types): +def collate_types(types, forbid_collation_to_float=False): """ Takes a sequence of types and returns their "common type" e.g. (float, double, float) -> double Uses the collation rules from numpy. """ + if forbid_collation_to_float: + types = [t for t in types if not (hasattr(t, 'is_float') and t.is_float())] + if not types: + return [create_type('int32')] + # Pointer arithmetic case i.e. pointer + integer is allowed if any(type(t) is PointerType for t in types): pointer_type = None @@ -433,6 +489,8 @@ def get_type_of_expression(expr, return create_type(default_float_type) elif isinstance(expr, ResolvedFieldAccess): return expr.field.dtype + elif isinstance(expr, pystencils.field.Field.AbstractAccess): + return expr.field.dtype elif isinstance(expr, TypedSymbol): return expr.dtype elif isinstance(expr, sp.Symbol): @@ -525,6 +583,10 @@ class BasicType(Type): def numpy_dtype(self): return self._dtype + @property + def sympy_dtype(self): + return getattr(sympy.codegen.ast, str(self.numpy_dtype)) + @property def item_size(self): return 1 diff --git a/pystencils/field.py b/pystencils/field.py index b6437a971398c70c0bcf392129269b87c912f0b8..128ba9df135958a37fbe20179318d646c9b7d89f 100644 --- a/pystencils/field.py +++ b/pystencils/field.py @@ -1,4 +1,6 @@ +import functools import hashlib +import operator import pickle import re from enum import Enum @@ -9,6 +11,7 @@ import numpy as np import sympy as sp from sympy.core.cache import cacheit +import pystencils from pystencils.alignedarray import aligned_empty from pystencils.data_types import StructType, TypedSymbol, create_type from pystencils.kernelparameters import FieldShapeSymbol, FieldStrideSymbol @@ -38,7 +41,6 @@ def fields(description=None, index_dimensions=0, layout=None, **kwargs): >>> assert s.index_dimensions == 0 and s.dtype.numpy_dtype == arr_s.dtype >>> assert v.index_shape == (2,) - Format string can be left out, field names are taken from keyword arguments. >>> fields(f1=arr_s, f2=arr_s) [f1, f2] @@ -292,6 +294,10 @@ class Field(AbstractField): self.shape = shape self.strides = strides self.latex_name = None # type: Optional[str] + self.coordinate_origin = sp.Matrix(tuple( + 0 for _ in range(self.spatial_dimensions) + )) # type: tuple[float,sp.Symbol] + self.coordinate_transform = sp.eye(self.spatial_dimensions) def new_field_with_different_name(self, new_name): if self.has_fixed_shape: @@ -312,6 +318,9 @@ class Field(AbstractField): def ndim(self) -> int: return len(self.shape) + def values_per_cell(self) -> int: + return functools.reduce(operator.mul, self.index_shape, 1) + @property def layout(self): return self._layout @@ -393,6 +402,27 @@ class Field(AbstractField): assert FieldType.is_custom(self) return Field.Access(self, offset, index, is_absolute_access=True) + def interpolated_access(self, + offset: Tuple, + interpolation_mode='linear', + address_mode='BORDER', + allow_textures=True): + """Provides access to field values at non-integer positions + + ``interpolated_access`` is similar to :func:`Field.absolute_access` except that + it allows non-integer offsets and automatic handling of out-of-bound accesses. + + :param offset: Tuple of spatial coordinates (can be floats) + :param interpolation_mode: One of :class:`pystencils.interpolation_astnodes.InterpolationMode` + :param address_mode: How boundaries are handled can be 'border', 'wrap', 'mirror', 'clamp' + :param allow_textures: Allow implementation by texture accesses on GPUs + """ + from pystencils.interpolation_astnodes import Interpolator + return Interpolator(self, + interpolation_mode, + address_mode, + allow_textures=allow_textures).at(offset) + def __call__(self, *args, **kwargs): center = tuple([0] * self.spatial_dimensions) return Field.Access(self, center)(*args, **kwargs) @@ -409,6 +439,34 @@ class Field(AbstractField): return False return self.hashable_contents() == other.hashable_contents() + @property + def physical_coordinates(self): + return self.coordinate_transform @ (self.coordinate_origin + pystencils.x_vector(self.spatial_dimensions)) + + @property + def physical_coordinates_staggered(self): + return self.coordinate_transform @ \ + (self.coordinate_origin + pystencils.x_staggered_vector(self.spatial_dimensions)) + + def index_to_physical(self, index_coordinates, staggered=False): + if staggered: + index_coordinates = sp.Matrix([i + 0.5 for i in index_coordinates]) + return self.coordinate_transform @ (self.coordinate_origin + index_coordinates) + + def physical_to_index(self, physical_coordinates, staggered=False): + rtn = self.coordinate_transform.inv() @ physical_coordinates - self.coordinate_origin + if staggered: + rtn = sp.Matrix([i - 0.5 for i in rtn]) + + return rtn + + def index_to_staggered_physical_coordinates(self, symbol_vector): + symbol_vector += sp.Matrix([0.5] * self.spatial_dimensions) + return self.create_physical_coordinates(symbol_vector) + + def set_coordinate_origin_to_field_center(self): + self.coordinate_origin = -sp.Matrix([i / 2 for i in self.spatial_shape]) + # noinspection PyAttributeOutsideInit,PyUnresolvedReferences class Access(TypedSymbol, AbstractField.AbstractAccess): """Class representing a relative access into a `Field`. @@ -429,11 +487,12 @@ class Field(AbstractField): >>> central_y_component.at_index(0) # change component v_C^0 """ + def __new__(cls, name, *args, **kwargs): obj = Field.Access.__xnew_cached_(cls, name, *args, **kwargs) return obj - def __new_stage2__(self, field, offsets=(0, 0, 0), idx=None, is_absolute_access=False): + def __new_stage2__(self, field, offsets=(0, 0, 0), idx=None, is_absolute_access=False, dtype=None): field_name = field.name offsets_and_index = (*offsets, *idx) if idx is not None else offsets constant_offsets = not any([isinstance(o, sp.Basic) and not o.is_Integer for o in offsets_and_index]) @@ -484,7 +543,7 @@ class Field(AbstractField): return obj def __getnewargs__(self): - return self.field, self.offsets, self.index, self.is_absolute_access + return self.field, self.offsets, self.index, self.is_absolute_access, self.dtype # noinspection SpellCheckingInspection __xnew__ = staticmethod(__new_stage2__) @@ -503,7 +562,7 @@ class Field(AbstractField): if len(idx) != self.field.index_dimensions: raise ValueError("Wrong number of indices: " "Got %d, expected %d" % (len(idx), self.field.index_dimensions)) - return Field.Access(self.field, self._offsets, idx) + return Field.Access(self.field, self._offsets, idx, dtype=self.dtype) def __getitem__(self, *idx): return self.__call__(*idx) @@ -562,7 +621,7 @@ class Field(AbstractField): """ offset_list = list(self.offsets) offset_list[coord_id] += offset - return Field.Access(self.field, tuple(offset_list), self.index) + return Field.Access(self.field, tuple(offset_list), self.index, dtype=self.dtype) def get_shifted(self, *shift) -> 'Field.Access': """Returns a new Access with changed spatial coordinates @@ -572,7 +631,10 @@ class Field(AbstractField): >>> f[0,0].get_shifted(1, 1) f_NE """ - return Field.Access(self.field, tuple(a + b for a, b in zip(shift, self.offsets)), self.index) + return Field.Access(self.field, + tuple(a + b for a, b in zip(shift, self.offsets)), + self.index, + dtype=self.dtype) def at_index(self, *idx_tuple) -> 'Field.Access': """Returns new Access with changed index. @@ -582,7 +644,7 @@ class Field(AbstractField): >>> f(0).at_index(8) f_C^8 """ - return Field.Access(self.field, self.offsets, idx_tuple) + return Field.Access(self.field, self.offsets, idx_tuple, dtype=self.dtype) @property def is_absolute_access(self) -> bool: diff --git a/pystencils/gpucuda/cudajit.py b/pystencils/gpucuda/cudajit.py index 92738e1e7df9c35dca87ff5f3de84ceeb5bcad8a..e38290338236c16231070e13d11a658a5223bc71 100644 --- a/pystencils/gpucuda/cudajit.py +++ b/pystencils/gpucuda/cudajit.py @@ -3,7 +3,9 @@ import numpy as np from pystencils.backends.cbackend import generate_c, get_headers from pystencils.data_types import StructType from pystencils.field import FieldType -from pystencils.include import get_pystencils_include_path +from pystencils.gpucuda.texture_utils import ndarray_to_tex +from pystencils.include import get_pycuda_include_path, get_pystencils_include_path +from pystencils.interpolation_astnodes import TextureAccess from pystencils.kernelparameters import FieldPointerSymbol USE_FAST_MATH = True @@ -29,17 +31,33 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen if argument_dict is None: argument_dict = {} - header_list = ['<stdint.h>'] + list(get_headers(kernel_function_node)) + header_list = ['<cstdint>'] + list(get_headers(kernel_function_node)) includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list]) code = includes + "\n" code += "#define FUNC_PREFIX __global__\n" code += "#define RESTRICT __restrict__\n\n" code += str(generate_c(kernel_function_node, dialect='cuda', custom_backend=custom_backend)) - options = ["-w", "-std=c++11", "-Wno-deprecated-gpu-targets"] + textures = set(d.texture for d in kernel_function_node.atoms(TextureAccess)) + + nvcc_options = ["-w", "-std=c++11", "-Wno-deprecated-gpu-targets"] if USE_FAST_MATH: - options.append("-use_fast_math") - mod = SourceModule(code, options=options, include_dirs=[get_pystencils_include_path()]) + nvcc_options.append("-use_fast_math") + + # Code for + # if any(t.interpolation_mode == InterpolationMode.CUBIC_SPLINE for t in textures): + # assert isdir(join(dirname(__file__), "CubicInterpolationCUDA", "code")), \ + # "Submodule CubicInterpolationCUDA does not exist" + # nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code")] + # nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")] + + # needed_dims = set(t.field.spatial_dimensions for t in textures + # if t.interpolation_mode == InterpolationMode.CUBIC_SPLINE) + # for i in needed_dims: + # code = 'extern "C++" {\n#include "cubicTex%iD.cu"\n}\n' % i + code + + mod = SourceModule(code, options=nvcc_options, include_dirs=[ + get_pystencils_include_path(), get_pycuda_include_path()]) func = mod.get_function(kernel_function_node.function_name) parameters = kernel_function_node.get_parameters() @@ -63,6 +81,12 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen block_and_thread_numbers['block'] = tuple(int(i) for i in block_and_thread_numbers['block']) block_and_thread_numbers['grid'] = tuple(int(i) for i in block_and_thread_numbers['grid']) + # TODO: use texture objects: + # https://devblogs.nvidia.com/cuda-pro-tip-kepler-texture-objects-improve-performance-and-flexibility/ + for tex in textures: + tex_ref = mod.get_texref(str(tex)) + ndarray_to_tex(tex_ref, full_arguments[tex.field.name], tex.address_mode, + tex.filter_mode, tex.use_normalized_coordinates, tex.read_as_integer) args = _build_numpy_argument_list(parameters, full_arguments) cache[key] = (args, block_and_thread_numbers) cache_values.append(kwargs) # keep objects alive such that ids remain unique diff --git a/pystencils/gpucuda/kernelcreation.py b/pystencils/gpucuda/kernelcreation.py index ff82107000b00595dbcca6699ed42b172c324353..ed916d54b8d4434e0e38beb68dd8ecb933aff567 100644 --- a/pystencils/gpucuda/kernelcreation.py +++ b/pystencils/gpucuda/kernelcreation.py @@ -4,12 +4,18 @@ from pystencils.field import Field, FieldType from pystencils.gpucuda.cudajit import make_python_function from pystencils.gpucuda.indexing import BlockIndexing from pystencils.transformations import ( - add_types, get_base_buffer_index, get_common_shape, parse_base_pointer_info, - resolve_buffer_accesses, resolve_field_accesses, unify_shape_symbols) - - -def create_cuda_kernel(assignments, function_name="kernel", type_info=None, indexing_creator=BlockIndexing, - iteration_slice=None, ghost_layers=None, skip_independence_check=False): + add_types, get_base_buffer_index, get_common_shape, implement_interpolations, + parse_base_pointer_info, resolve_buffer_accesses, resolve_field_accesses, unify_shape_symbols) + + +def create_cuda_kernel(assignments, + function_name="kernel", + type_info=None, + indexing_creator=BlockIndexing, + iteration_slice=None, + ghost_layers=None, + skip_independence_check=False, + use_textures_for_interpolation=True): fields_read, fields_written, assignments = add_types(assignments, type_info, not skip_independence_check) all_fields = fields_read.union(fields_written) read_only_fields = set([f.name for f in fields_read - fields_written]) @@ -57,6 +63,8 @@ def create_cuda_kernel(assignments, function_name="kernel", type_info=None, inde ast = KernelFunction(block, 'gpu', 'gpucuda', make_python_function, ghost_layers, function_name) ast.global_variables.update(indexing.index_variables) + implement_interpolations(ast, implement_by_texture_accesses=use_textures_for_interpolation) + base_pointer_spec = [['spatialInner0']] base_pointer_info = {f.name: parse_base_pointer_info(base_pointer_spec, [2, 1, 0], f.spatial_dimensions, f.index_dimensions) @@ -86,8 +94,13 @@ def create_cuda_kernel(assignments, function_name="kernel", type_info=None, inde return ast -def created_indexed_cuda_kernel(assignments, index_fields, function_name="kernel", type_info=None, - coordinate_names=('x', 'y', 'z'), indexing_creator=BlockIndexing): +def created_indexed_cuda_kernel(assignments, + index_fields, + function_name="kernel", + type_info=None, + coordinate_names=('x', 'y', 'z'), + indexing_creator=BlockIndexing, + use_textures_for_interpolation=True): fields_read, fields_written, assignments = add_types(assignments, type_info, check_independence_condition=False) all_fields = fields_read.union(fields_written) read_only_fields = set([f.name for f in fields_read - fields_written]) @@ -125,6 +138,8 @@ def created_indexed_cuda_kernel(assignments, index_fields, function_name="kernel ast = KernelFunction(function_body, 'gpu', 'gpucuda', make_python_function, None, function_name) ast.global_variables.update(indexing.index_variables) + implement_interpolations(ast, implement_by_texture_accesses=use_textures_for_interpolation) + coord_mapping = indexing.coordinates base_pointer_spec = [['spatialInner0']] base_pointer_info = {f.name: parse_base_pointer_info(base_pointer_spec, [2, 1, 0], diff --git a/pystencils/gpucuda/texture_utils.py b/pystencils/gpucuda/texture_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d29d862929530b27f840da445467a2bae44b1bfb --- /dev/null +++ b/pystencils/gpucuda/texture_utils.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +# +# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de> +# +# Distributed under terms of the GPLv3 license. + +""" + +""" + +from os.path import dirname, isdir, join + +import numpy as np + +try: + import pycuda.driver as cuda + from pycuda import gpuarray +except Exception: + pass + + +def pow_two_divider(n): + if n == 0: + return 0 + divider = 1 + while (n & divider) == 0: + divider <<= 1 + return divider + + +def ndarray_to_tex(tex_ref, + ndarray, + address_mode=None, + filter_mode=None, + use_normalized_coordinates=False, + read_as_integer=False): + + if address_mode is None: + address_mode = cuda.address_mode.BORDER + if filter_mode is None: + filter_mode = cuda.filter_mode.LINEAR + + if isinstance(ndarray, np.ndarray): + cu_array = cuda.np_to_array(ndarray, 'C') + elif isinstance(ndarray, gpuarray.GPUArray): + cu_array = cuda.gpuarray_to_array(ndarray, 'C') + else: + raise TypeError( + 'ndarray must be numpy.ndarray or pycuda.gpuarray.GPUArray') + + cuda.TextureReference.set_array(tex_ref, cu_array) + + tex_ref.set_address_mode(0, address_mode) + if ndarray.ndim >= 2: + tex_ref.set_address_mode(1, address_mode) + if ndarray.ndim >= 3: + tex_ref.set_address_mode(2, address_mode) + tex_ref.set_filter_mode(filter_mode) + + if not use_normalized_coordinates: + tex_ref.set_flags(tex_ref.get_flags() & ~cuda.TRSF_NORMALIZED_COORDINATES) + + if not read_as_integer: + tex_ref.set_flags(tex_ref.get_flags() & ~cuda.TRSF_READ_AS_INTEGER) + + +def prefilter_for_cubic_bspline(gpuarray): + import pycuda.autoinit # NOQA + from pycuda.compiler import SourceModule + + ndim = gpuarray.ndim + assert ndim == 2 or ndim == 3, "Only 2d or 3d supported" + assert isdir(join(dirname(__file__), "CubicInterpolationCUDA", "code")), \ + "Submodule CubicInterpolationCUDA does not exist" + nvcc_options = ["-w", "-std=c++11", "-Wno-deprecated-gpu-targets"] + nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code")] + nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")] + + file_name = join(dirname(__file__), "CubicInterpolationCUDA", "code", "cubicPrefilter%iD.cu" % ndim) + with open(file_name) as file: + code = file.read() + + mod = SourceModule(code, options=nvcc_options) + + if ndim == 2: + height, width = gpuarray.shape + block = min(pow_two_divider(height), 64) + grid = height // block + func = mod.get_function('SamplesToCoefficients2DXf') + func(gpuarray, np.uint32(gpuarray.strides[-2]), *(np.uint32(r) + for r in reversed(gpuarray.shape)), + block=(block, 1, 1), + grid=(grid, 1, 1)) + + block = min(pow_two_divider(width), 64) + grid = width // block + func = mod.get_function('SamplesToCoefficients2DYf') + func(gpuarray, np.uint32(gpuarray.strides[-2]), *(np.uint32(r) + for r in reversed(gpuarray.shape)), + block=(block, 1, 1), + grid=(grid, 1, 1)) + elif ndim == 3: + depth, height, width = gpuarray.shape + dimX = min(min(pow_two_divider(width), pow_two_divider(height)), 64) + dimY = min(min(pow_two_divider(depth), pow_two_divider(height)), 512 / dimX) + block = (dimX, dimY, 1) + + dimGridX = (height // block[0], depth // block[1], 1) + dimGridY = (width // block[0], depth // block[1], 1) + dimGridZ = (width // block[0], height // block[1], 1) + + func = mod.get_function("SamplesToCoefficients3DXf") + func(gpuarray, np.uint32(gpuarray.strides[-2]), *(np.uint32(r) + for r in reversed(gpuarray.shape)), + block=block, + grid=dimGridX) + func = mod.get_function("SamplesToCoefficients3DYf") + func(gpuarray, np.uint32(gpuarray.strides[-2]), *(np.uint32(r) + for r in reversed(gpuarray.shape)), + block=block, + grid=dimGridY) + func = mod.get_function("SamplesToCoefficients3DZf") + func(gpuarray, np.uint32(gpuarray.strides[-2]), *(np.uint32(r) + for r in reversed(gpuarray.shape)), + block=block, + grid=dimGridZ) diff --git a/pystencils/include/__init__.py b/pystencils/include/__init__.py index 03a19735db12d1de30f46104f85c0cb9f7f6da20..8ddff6a4a8f5f3ceda691cff850dc3377d98278c 100644 --- a/pystencils/include/__init__.py +++ b/pystencils/include/__init__.py @@ -1,5 +1,10 @@ -import os +from os.path import dirname, join, realpath def get_pystencils_include_path(): - return os.path.dirname(os.path.realpath(__file__)) + return dirname(realpath(__file__)) + + +def get_pycuda_include_path(): + import pycuda + return join(dirname(realpath(pycuda.__file__)), 'cuda') diff --git a/pystencils/interpolation_astnodes.py b/pystencils/interpolation_astnodes.py new file mode 100644 index 0000000000000000000000000000000000000000..51fa20f40f1a357960ed72b09526c7103c72c841 --- /dev/null +++ b/pystencils/interpolation_astnodes.py @@ -0,0 +1,426 @@ +# -*- coding: utf-8 -*- +# +# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de> +# +# Distributed under terms of the GPLv3 license. + +""" + +""" + +import itertools +from enum import Enum +from typing import Set + +import sympy as sp +from sympy.core.cache import cacheit + +import pystencils +from pystencils.astnodes import Node +from pystencils.data_types import TypedSymbol, cast_func, create_type + +try: + import pycuda.driver +except Exception: + pass + + +class InterpolationMode(str, Enum): + NEAREST_NEIGHBOR = "nearest_neighbour" + NN = NEAREST_NEIGHBOR + LINEAR = "linear" + CUBIC_SPLINE = "cubic_spline" + + +class Interpolator(object): + """ + Implements non-integer accesses on fields using linear interpolation. + + On GPU, this interpolator can be implemented by a :class:`.TextureCachedField` for hardware acceleration. + + Address modes are different boundary handlings possible choices are like for CUDA textures + + **CLAMP** + + The signal c[k] is continued outside k=0,...,M-1 so that c[k] = c[0] for k < 0, and c[k] = c[M-1] for k >= M. + + **BORDER** + + The signal c[k] is continued outside k=0,...,M-1 so that c[k] = 0 for k < 0and for k >= M. + + Now, to describe the last two address modes, we are forced to consider normalized coordinates, + so that the 1D input signal samples are assumed to be c[k / M], with k=0,...,M-1. + + **WRAP** + + The signal c[k / M] is continued outside k=0,...,M-1 so that it is periodic with period equal to M. + In other words, c[(k + p * M) / M] = c[k / M] for any (positive, negative or vanishing) integer p. + + **MIRROR** + + The signal c[k / M] is continued outside k=0,...,M-1 so that it is periodic with period equal to 2 * M - 2. + In other words, c[l / M] = c[k / M] for any l and k such that (l + k)mod(2 * M - 2) = 0. + + Explanations from https://stackoverflow.com/questions/19020963/the-different-addressing-modes-of-cuda-textures + """ + + required_global_declarations = [] + + def __init__(self, + parent_field, + interpolation_mode: InterpolationMode, + address_mode='BORDER', + use_normalized_coordinates=False, + allow_textures=True): + super().__init__() + + self.field = parent_field.new_field_with_different_name(parent_field.name) + self.field.field_type = pystencils.field.FieldType.CUSTOM + self.address_mode = address_mode + self.use_normalized_coordinates = use_normalized_coordinates + hash_str = "%x" % abs(hash(self.field) + hash(address_mode)) + self.symbol = TypedSymbol('dummy_symbol_carrying_field' + self.field.name + hash_str, + 'dummy_symbol_carrying_field' + self.field.name + hash_str) + self.symbol.field = self.field + self.symbol.interpolator = self + self.allow_textures = allow_textures + self.interpolation_mode = interpolation_mode + + def at(self, offset): + return InterpolatorAccess(self.symbol, *[sp.S(o) for o in offset]) + + def __getitem__(self, offset): + return InterpolatorAccess(self.symbol, *[sp.S(o) for o in offset]) + + def __str__(self): + return '%s_interpolator' % self.field.name + + def __repr__(self): + return self.__str__() + + def __hash__(self): + return hash((str(self.address_mode), + str(type(self)), + self.symbol, + self.address_mode, + self.use_normalized_coordinates)) + + +class LinearInterpolator(Interpolator): + + def __init__(self, + parent_field: pystencils.Field, + address_mode='BORDER', + use_normalized_coordinates=False): + super().__init__(parent_field, + InterpolationMode.LINEAR, + address_mode, + use_normalized_coordinates) + + +class NearestNeightborInterpolator(Interpolator): + + def __init__(self, + parent_field: pystencils.Field, + address_mode='BORDER', + use_normalized_coordinates=False): + super().__init__(parent_field, + InterpolationMode.NN, + address_mode, + use_normalized_coordinates) + + +class InterpolatorAccess(TypedSymbol): + def __new__(cls, field, offsets, *args, **kwargs): + obj = TextureAccess.__xnew_cached_(cls, field, offsets, *args, **kwargs) + return obj + + def __new_stage2__(self, symbol, *offsets): + assert offsets is not None + obj = super().__xnew__(self, '%s_interpolator_%x' % + (symbol.field.name, abs(hash(tuple(offsets)))), symbol.field.dtype) + obj.offsets = offsets + obj.symbol = symbol + obj.field = symbol.field + obj.interpolator = symbol.interpolator + return obj + + def __hash__(self): + return hash((self.symbol, self.field, tuple(self.offsets), self.interpolator)) + + def __str__(self): + return '%s_interpolator(%s)' % (self.field.name, ','.join(str(o) for o in self.offsets)) + + def __repr__(self): + return self.__str__() + + def atoms(self, *types): + if self.offsets: + offsets = set(o for o in self.offsets if isinstance(o, types)) + if isinstance(self, *types): + offsets.update([self]) + for o in self.offsets: + if hasattr(o, 'atoms'): + offsets.update(set(o.atoms(types))) + return offsets + else: + return set() + + @property + def free_symbols(self): + symbols = set() + if self.offsets is not None: + for o in self.offsets: + if hasattr(o, 'free_symbols'): + symbols.update(set(o.free_symbols)) + # if hasattr(o, 'atoms'): + # symbols.update(set(o.atoms(sp.Symbol))) + + return symbols + + @property + def args(self): + return [self.symbol, *self.offsets] + + @property + def symbols_defined(self) -> Set[sp.Symbol]: + return {self} + + @property + def interpolation_mode(self): + return self.interpolator.interpolation_mode + + def implementation_with_stencils(self): + field = self.field + + default_int_type = create_type('int64') + use_textures = isinstance(self, TextureAccess) + if use_textures: + def absolute_access(x, _): + return self.texture.at((o for o in x)) + else: + absolute_access = field.absolute_access + + sum = [0, ] * (field.shape[0] if field.index_dimensions else 1) + + offsets = self.offsets + rounding_functions = (sp.floor, lambda x: sp.floor(x) + 1) + + for channel_idx in range(field.shape[0] if field.index_dimensions else 1): + if self.interpolation_mode == InterpolationMode.NN: + if use_textures: + sum[channel_idx] = self + else: + sum[channel_idx] = absolute_access([sp.floor(i + 0.5) for i in offsets], channel_idx) + + elif self.interpolation_mode == InterpolationMode.LINEAR: + # TODO optimization: implement via lerp: https://devblogs.nvidia.com/lerp-faster-cuda/ + for c in itertools.product(rounding_functions, repeat=field.spatial_dimensions): + weight = sp.Mul(*[1 - sp.Abs(f(offset) - offset) for (f, offset) in zip(c, offsets)]) + index = [f(offset) for (f, offset) in zip(c, offsets)] + # Hardware boundary handling on GPU + if use_textures: + weight = sp.Mul(*[1 - sp.Abs(f(offset) - offset) for (f, offset) in zip(c, offsets)]) + sum[channel_idx] += \ + weight * absolute_access(index, channel_idx if field.index_dimensions else ()) + # else boundary handling using software + elif str(self.interpolator.address_mode).lower() == 'border': + is_inside_field = sp.And( + *itertools.chain([i >= 0 for i in index], + [idx < field.shape[dim] for (dim, idx) in enumerate(index)])) + index = [cast_func(i, default_int_type) for i in index] + sum[channel_idx] += sp.Piecewise( + (weight * absolute_access(index, channel_idx if field.index_dimensions else ()), + is_inside_field), + (sp.simplify(0), True) + ) + elif str(self.interpolator.address_mode).lower() == 'clamp': + index = [sp.Min(sp.Max(0, cast_func(i, default_int_type)), field.spatial_shape[dim] - 1) + for (dim, i) in enumerate(index)] + sum[channel_idx] += weight * \ + absolute_access(index, channel_idx if field.index_dimensions else ()) + elif str(self.interpolator.address_mode).lower() == 'wrap': + index = [sp.Mod(cast_func(i, default_int_type), field.shape[dim] - 1) + for (dim, i) in enumerate(index)] + index = [cast_func(sp.Piecewise((i, i > 0), + (sp.Abs(cast_func(field.shape[dim] - 1 + i, default_int_type)), + True)), default_int_type) + for (dim, i) in enumerate(index)] + sum[channel_idx] += weight * \ + absolute_access(index, channel_idx if field.index_dimensions else ()) + # sum[channel_idx] = 0 + elif str(self.interpolator.address_mode).lower() == 'mirror': + def triangle_fun(x, half_period): + saw_tooth = sp.Abs(cast_func(x, default_int_type)) % ( + cast_func(2 * half_period, create_type('int32'))) + return sp.Piecewise((saw_tooth, saw_tooth < half_period), + (2 * half_period - 1 - saw_tooth, True)) + index = [cast_func(triangle_fun(i, field.shape[dim]), + default_int_type) for (dim, i) in enumerate(index)] + sum[channel_idx] += weight * \ + absolute_access(index, channel_idx if field.index_dimensions else ()) + else: + raise NotImplementedError() + elif self.interpolation_mode == InterpolationMode.CUBIC_SPLINE: + raise NotImplementedError("only works with HW interpolation for float32") + + sum = [sp.factor(s) for s in sum] + + if field.index_dimensions: + return sp.Matrix(sum) + else: + return sum[0] + + # noinspection SpellCheckingInspection + __xnew__ = staticmethod(__new_stage2__) + # noinspection SpellCheckingInspection + __xnew_cached_ = staticmethod(cacheit(__new_stage2__)) + +########################################################################################## +# GPU-specific fast specializations (for precision GPUs can also use above nodes/symbols # +########################################################################################## + + +class TextureCachedField: + + def __init__(self, parent_field, + address_mode=None, + filter_mode=None, + interpolation_mode: InterpolationMode = InterpolationMode.LINEAR, + use_normalized_coordinates=False, + read_as_integer=False + ): + if isinstance(address_mode, str): + address_mode = getattr(pycuda.driver.address_mode, address_mode.upper()) + + if address_mode is None: + address_mode = pycuda.driver.address_mode.BORDER + if filter_mode is None: + filter_mode = pycuda.driver.filter_mode.LINEAR + + # self, field_name, field_type, dtype, layout, shape, strides + self.field = parent_field + self.address_mode = address_mode + self.filter_mode = filter_mode + self.read_as_integer = read_as_integer + self.use_normalized_coordinates = use_normalized_coordinates + self.symbol = TypedSymbol(str(self), self.field.dtype.numpy_dtype) + self.symbol.interpolator = self + self.symbol.field = self.field + self.required_global_declarations = [TextureDeclaration(self)] + self.interpolation_mode = interpolation_mode + + # assert str(self.field.dtype) != 'double', "CUDA does not support double textures!" + # assert dtype_supports_textures(self.field.dtype), "CUDA only supports texture types with 32 bits or less" + + @classmethod + def from_interpolator(cls, interpolator: LinearInterpolator): + if hasattr(interpolator, 'allow_textures') and not interpolator.allow_textures: + return interpolator + obj = cls(interpolator.field, interpolator.address_mode, interpolation_mode=interpolator.interpolation_mode) + return obj + + def at(self, offset): + return TextureAccess(self.symbol, *offset) + + def __getitem__(self, offset): + return TextureAccess(self.symbol, *offset) + + def __str__(self): + return '%s_texture_%x' % (self.field.name, abs(hash(self.field) + hash(str(self.address_mode)))) + + def __repr__(self): + return self.__str__() + + +class TextureAccess(InterpolatorAccess): + def __new__(cls, texture_symbol, offsets, *args, **kwargs): + obj = TextureAccess.__xnew_cached_(cls, texture_symbol, offsets, *args, **kwargs) + return obj + + def __new_stage2__(self, symbol, *offsets): + obj = super().__xnew__(self, symbol, *offsets) + obj.required_global_declarations = symbol.interpolator.required_global_declarations + obj.required_global_declarations[0]._symbols_defined.add(obj) + return obj + + def __str__(self): + return '%s_texture(%s)' % (self.interpolator.field.name, ','.join(str(o) for o in self.offsets)) + + @property + def texture(self): + return self.interpolator + + # noinspection SpellCheckingInspection + __xnew__ = staticmethod(__new_stage2__) + # noinspection SpellCheckingInspection + __xnew_cached_ = staticmethod(cacheit(__new_stage2__)) + + +class TextureDeclaration(Node): + """ + A global declaration of a texture. Visible both for device and host code. + + .. code:: cpp + + // This Node represents the following global declaration + texture<float, cudaTextureType2D, cudaReadModeElementType> x_texture_5acc9fced7b0dc3e; + + __device__ kernel(...) { + // kernel acceses x_texture_5acc9fced7b0dc3e with tex2d(...) + } + + __host__ launch_kernel(...) { + // Host needs to bind the texture + cudaBindTexture(0, x_texture_5acc9fced7b0dc3e, buffer, N*sizeof(float)); + } + + This has been deprecated by CUDA in favor of :class:`.TextureObject`. + But texture objects are not yet supported by PyCUDA (https://github.com/inducer/pycuda/pull/174) + """ + + def __init__(self, parent_texture): + self.texture = parent_texture + self._symbols_defined = {self.texture.symbol} + + @property + def symbols_defined(self) -> Set[sp.Symbol]: + return self._symbols_defined + + @property + def args(self) -> Set[sp.Symbol]: + return set() + + @property + def headers(self): + return ['"pycuda-helpers.hpp"'] + + +class TextureObject(TextureDeclaration): + """ + A CUDA texture object. Opposed to :class:`.TextureDeclaration` it is not declared globally but + used as a function argument for the kernel call. + + Like :class:`.TextureDeclaration` it defines :class:`.TextureAccess` symbols. + Just the printing representation is a bit different. + """ + pass + + +def dtype_supports_textures(dtype): + """ + Returns whether CUDA natively supports texture fetches with this numpy dtype. + + The maximum word size for a texture fetch is four bytes. + + With this trick also larger dtypes can be fetched: + https://github.com/inducer/pycuda/blob/master/pycuda/cuda/pycuda-helpers.hpp + + """ + if hasattr(dtype, 'numpy_dtype'): + dtype = dtype.numpy_dtype + + if isinstance(dtype, type): + return dtype().itemsize <= 4 + + return dtype.itemsize <= 4 diff --git a/pystencils/kernelcreation.py b/pystencils/kernelcreation.py index ade980f55969a4005f2c5055ad27f861ab5524de..866f1a4ef5357e6a1a74d21b174d24785c398677 100644 --- a/pystencils/kernelcreation.py +++ b/pystencils/kernelcreation.py @@ -12,10 +12,18 @@ from pystencils.transformations import ( loop_blocking, move_constants_before_loop, remove_conditionals_in_staggered_kernel) -def create_kernel(assignments, target='cpu', data_type="double", iteration_slice=None, ghost_layers=None, +def create_kernel(assignments, + target='cpu', + data_type="double", + iteration_slice=None, + ghost_layers=None, skip_independence_check=False, - cpu_openmp=False, cpu_vectorize_info=None, cpu_blocking=None, - gpu_indexing='block', gpu_indexing_params=MappingProxyType({})): + cpu_openmp=False, + cpu_vectorize_info=None, + cpu_blocking=None, + gpu_indexing='block', + gpu_indexing_params=MappingProxyType({}), + use_textures_for_interpolation=True): """ Creates abstract syntax tree (AST) of kernel, using a list of update equations. @@ -99,14 +107,22 @@ def create_kernel(assignments, target='cpu', data_type="double", iteration_slice ast = create_cuda_kernel(assignments, type_info=data_type, indexing_creator=indexing_creator_from_params(gpu_indexing, gpu_indexing_params), iteration_slice=iteration_slice, ghost_layers=ghost_layers, - skip_independence_check=skip_independence_check) + skip_independence_check=skip_independence_check, + use_textures_for_interpolation=use_textures_for_interpolation) return ast else: raise ValueError("Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target,)) -def create_indexed_kernel(assignments, index_fields, target='cpu', data_type="double", coordinate_names=('x', 'y', 'z'), - cpu_openmp=True, gpu_indexing='block', gpu_indexing_params=MappingProxyType({})): +def create_indexed_kernel(assignments, + index_fields, + target='cpu', + data_type="double", + coordinate_names=('x', 'y', 'z'), + cpu_openmp=True, + gpu_indexing='block', + gpu_indexing_params=MappingProxyType({}), + use_textures_for_interpolation=True): """ Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling. @@ -159,8 +175,12 @@ def create_indexed_kernel(assignments, index_fields, target='cpu', data_type="do elif target == 'gpu': from pystencils.gpucuda import created_indexed_cuda_kernel idx_creator = indexing_creator_from_params(gpu_indexing, gpu_indexing_params) - ast = created_indexed_cuda_kernel(assignments, index_fields, type_info=data_type, - coordinate_names=coordinate_names, indexing_creator=idx_creator) + ast = created_indexed_cuda_kernel(assignments, + index_fields, + type_info=data_type, + coordinate_names=coordinate_names, + indexing_creator=idx_creator, + use_textures_for_interpolation=use_textures_for_interpolation) return ast else: raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,)) diff --git a/pystencils/spatial_coordinates.py b/pystencils/spatial_coordinates.py new file mode 100644 index 0000000000000000000000000000000000000000..6c3ba4db7d98b50bce9442619fbbce07782004a1 --- /dev/null +++ b/pystencils/spatial_coordinates.py @@ -0,0 +1,18 @@ + +import sympy + +import pystencils +import pystencils.astnodes + +x_, y_, z_ = tuple(pystencils.astnodes.LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(3)) +x_staggered, y_staggered, z_staggered = x_ + 0.5, y_ + 0.5, z_ + 0.5 + + +def x_vector(ndim): + return sympy.Matrix(tuple(pystencils.astnodes.LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(ndim))) + + +def x_staggered_vector(ndim): + return sympy.Matrix(tuple( + pystencils.astnodes.LoopOverCoordinate.get_loop_counter_symbol(i) + 0.5 for i in range(ndim) + )) diff --git a/pystencils/transformations.py b/pystencils/transformations.py index 554b9cf9b2257e6b0ef97b24239ea210baa8b63b..1bfb0511ac9a6a8afe11b8275e4ec8d8d75cb9f6 100644 --- a/pystencils/transformations.py +++ b/pystencils/transformations.py @@ -10,6 +10,7 @@ import sympy as sp from sympy.logic.boolalg import Boolean import pystencils.astnodes as ast +import pystencils.integer_functions from pystencils.assignment import Assignment from pystencils.data_types import ( PointerType, StructType, TypedSymbol, cast_func, collate_types, create_type, get_base_type, @@ -147,10 +148,7 @@ def get_common_shape(field_set): return shape -def make_loop_over_domain(body, - iteration_slice=None, - ghost_layers=None, - loop_order=None): +def make_loop_over_domain(body, iteration_slice=None, ghost_layers=None, loop_order=None): """Uses :class:`pystencils.field.Field.Access` to create (multiple) loops around given AST. Args: @@ -192,21 +190,17 @@ def make_loop_over_domain(body, if iteration_slice is None: begin = ghost_layers[loop_coordinate][0] end = shape[loop_coordinate] - ghost_layers[loop_coordinate][1] - new_loop = ast.LoopOverCoordinate(current_body, loop_coordinate, - begin, end, 1) + new_loop = ast.LoopOverCoordinate(current_body, loop_coordinate, begin, end, 1) current_body = ast.Block([new_loop]) else: slice_component = iteration_slice[loop_coordinate] if type(slice_component) is slice: sc = slice_component - new_loop = ast.LoopOverCoordinate(current_body, - loop_coordinate, sc.start, - sc.stop, sc.step) + new_loop = ast.LoopOverCoordinate(current_body, loop_coordinate, sc.start, sc.stop, sc.step) current_body = ast.Block([new_loop]) else: - assignment = ast.SympyAssignment( - ast.LoopOverCoordinate.get_loop_counter_symbol( - loop_coordinate), sp.sympify(slice_component)) + assignment = ast.SympyAssignment(ast.LoopOverCoordinate.get_loop_counter_symbol(loop_coordinate), + sp.sympify(slice_component)) current_body.insert_front(assignment) return current_body, ghost_layers @@ -245,11 +239,9 @@ def create_intermediate_base_pointer(field_access, coordinates, previous_ptr): offset += field.strides[coordinate_id] * coordinate_value if coordinate_id < field.spatial_dimensions: - offset += field.strides[coordinate_id] * field_access.offsets[ - coordinate_id] + offset += field.strides[coordinate_id] * field_access.offsets[coordinate_id] if type(field_access.offsets[coordinate_id]) is int: - name += "_%d%d" % (coordinate_id, - field_access.offsets[coordinate_id]) + name += "_%d%d" % (coordinate_id, field_access.offsets[coordinate_id]) else: list_to_hash.append(field_access.offsets[coordinate_id]) else: @@ -266,8 +258,7 @@ def create_intermediate_base_pointer(field_access, coordinates, previous_ptr): return new_ptr, offset -def parse_base_pointer_info(base_pointer_specification, loop_order, - spatial_dimensions, index_dimensions): +def parse_base_pointer_info(base_pointer_specification, loop_order, spatial_dimensions, index_dimensions): """ Creates base pointer specification for :func:`resolve_field_accesses` function. @@ -305,11 +296,10 @@ def parse_base_pointer_info(base_pointer_specification, loop_order, def add_new_element(elem): if elem >= spatial_dimensions + index_dimensions: - raise ValueError("Coordinate %d does not exist" % (elem, )) + raise ValueError("Coordinate %d does not exist" % (elem,)) new_group.append(elem) if elem in specified_coordinates: - raise ValueError("Coordinate %d specified two times" % - (elem, )) + raise ValueError("Coordinate %d specified two times" % (elem,)) specified_coordinates.add(elem) for element in spec_group: @@ -332,7 +322,7 @@ def parse_base_pointer_info(base_pointer_specification, loop_order, index = int(element[len("index"):]) add_new_element(spatial_dimensions + index) else: - raise ValueError("Unknown specification %s" % (element, )) + raise ValueError("Unknown specification %s" % (element,)) result.append(new_group) @@ -357,42 +347,30 @@ def get_base_buffer_index(ast_node, loop_counters=None, loop_iterations=None): base buffer index - required by 'resolve_buffer_accesses' function """ if loop_counters is None or loop_iterations is None: - loops = [ - l for l in filtered_tree_iteration( - ast_node, ast.LoopOverCoordinate, ast.SympyAssignment) - ] + loops = [l for l in filtered_tree_iteration(ast_node, ast.LoopOverCoordinate, ast.SympyAssignment)] loops.reverse() - parents_of_innermost_loop = list( - parents_of_type(loops[0], - ast.LoopOverCoordinate, - include_current=True)) + parents_of_innermost_loop = list(parents_of_type(loops[0], ast.LoopOverCoordinate, include_current=True)) assert len(loops) == len(parents_of_innermost_loop) - assert all(l1 is l2 - for l1, l2 in zip(loops, parents_of_innermost_loop)) + assert all(l1 is l2 for l1, l2 in zip(loops, parents_of_innermost_loop)) loop_iterations = [(l.stop - l.start) / l.step for l in loops] loop_counters = [l.loop_counter_symbol for l in loops] field_accesses = ast_node.atoms(AbstractField.AbstractAccess) - buffer_accesses = { - fa - for fa in field_accesses if FieldType.is_buffer(fa.field) - } + buffer_accesses = {fa for fa in field_accesses if FieldType.is_buffer(fa.field)} loop_counters = [v * len(buffer_accesses) for v in loop_counters] base_buffer_index = loop_counters[0] stride = 1 for idx, var in enumerate(loop_counters[1:]): cur_stride = loop_iterations[idx] - stride *= int(cur_stride) if isinstance(cur_stride, - float) else cur_stride + stride *= int(cur_stride) if isinstance(cur_stride, float) else cur_stride base_buffer_index += var * stride return base_buffer_index -def resolve_buffer_accesses(ast_node, - base_buffer_index, - read_only_field_names=set()): +def resolve_buffer_accesses(ast_node, base_buffer_index, read_only_field_names=set()): + def visit_sympy_expr(expr, enclosing_block, sympy_assignment): if isinstance(expr, AbstractField.AbstractAccess): field_access = expr @@ -402,24 +380,17 @@ def resolve_buffer_accesses(ast_node, return expr buffer = field_access.field - field_ptr = FieldPointerSymbol( - buffer.name, - buffer.dtype, - const=buffer.name in read_only_field_names) + field_ptr = FieldPointerSymbol(buffer.name, buffer.dtype, const=buffer.name in read_only_field_names) buffer_index = base_buffer_index if len(field_access.index) > 1: - raise RuntimeError( - 'Only indexing dimensions up to 1 are currently supported in buffers!' - ) + raise RuntimeError('Only indexing dimensions up to 1 are currently supported in buffers!') if len(field_access.index) > 0: cell_index = field_access.index[0] buffer_index += cell_index - result = ast.ResolvedFieldAccess(field_ptr, buffer_index, - field_access.field, - field_access.offsets, + result = ast.ResolvedFieldAccess(field_ptr, buffer_index, field_access.field, field_access.offsets, field_access.index) return visit_sympy_expr(result, enclosing_block, sympy_assignment) @@ -427,23 +398,16 @@ def resolve_buffer_accesses(ast_node, if isinstance(expr, ast.ResolvedFieldAccess): return expr - new_args = [ - visit_sympy_expr(e, enclosing_block, sympy_assignment) - for e in expr.args - ] - kwargs = { - 'evaluate': False - } if type(expr) in (sp.Add, sp.Mul, sp.Piecewise) else {} + new_args = [visit_sympy_expr(e, enclosing_block, sympy_assignment) for e in expr.args] + kwargs = {'evaluate': False} if type(expr) in (sp.Add, sp.Mul, sp.Piecewise) else {} return expr.func(*new_args, **kwargs) if new_args else expr def visit_node(sub_ast): if isinstance(sub_ast, ast.SympyAssignment): enclosing_block = sub_ast.parent assert type(enclosing_block) is ast.Block - sub_ast.lhs = visit_sympy_expr(sub_ast.lhs, enclosing_block, - sub_ast) - sub_ast.rhs = visit_sympy_expr(sub_ast.rhs, enclosing_block, - sub_ast) + sub_ast.lhs = visit_sympy_expr(sub_ast.lhs, enclosing_block, sub_ast) + sub_ast.rhs = visit_sympy_expr(sub_ast.rhs, enclosing_block, sub_ast) else: for i, a in enumerate(sub_ast.args): visit_node(a) @@ -451,8 +415,7 @@ def resolve_buffer_accesses(ast_node, return visit_node(ast_node) -def resolve_field_accesses(ast_node, - read_only_field_names=set(), +def resolve_field_accesses(ast_node, read_only_field_names=set(), field_to_base_pointer_info=MappingProxyType({}), field_to_fixed_coordinates=MappingProxyType({})): """ @@ -469,10 +432,8 @@ def resolve_field_accesses(ast_node, Returns transformed AST """ - field_to_base_pointer_info = OrderedDict( - sorted(field_to_base_pointer_info.items(), key=lambda pair: pair[0])) - field_to_fixed_coordinates = OrderedDict( - sorted(field_to_fixed_coordinates.items(), key=lambda pair: pair[0])) + field_to_base_pointer_info = OrderedDict(sorted(field_to_base_pointer_info.items(), key=lambda pair: pair[0])) + field_to_fixed_coordinates = OrderedDict(sorted(field_to_fixed_coordinates.items(), key=lambda pair: pair[0])) def visit_sympy_expr(expr, enclosing_block, sympy_assignment): if isinstance(expr, AbstractField.AbstractAccess): @@ -480,16 +441,13 @@ def resolve_field_accesses(ast_node, field = field_access.field if field_access.indirect_addressing_fields: - new_offsets = tuple( - visit_sympy_expr(off, enclosing_block, sympy_assignment) - for off in field_access.offsets) - new_indices = tuple( - visit_sympy_expr(ind, enclosing_block, sympy_assignment - ) if isinstance(ind, sp.Basic) else ind - for ind in field_access.index) + new_offsets = tuple(visit_sympy_expr(off, enclosing_block, sympy_assignment) + for off in field_access.offsets) + new_indices = tuple(visit_sympy_expr(ind, enclosing_block, sympy_assignment) + if isinstance(ind, sp.Basic) else ind + for ind in field_access.index) field_access = Field.Access(field_access.field, new_offsets, - new_indices, - field_access.is_absolute_access) + new_indices, field_access.is_absolute_access) if field.name in field_to_base_pointer_info: base_pointer_info = field_to_base_pointer_info[field.name] @@ -510,15 +468,12 @@ def resolve_field_accesses(ast_node, if e < field.spatial_dimensions: if field.name in field_to_fixed_coordinates: if not field_access.is_absolute_access: - coordinates[e] = field_to_fixed_coordinates[ - field.name][e] + coordinates[e] = field_to_fixed_coordinates[field.name][e] else: coordinates[e] = 0 else: if not field_access.is_absolute_access: - coordinates[ - e] = ast.LoopOverCoordinate.get_loop_counter_symbol( - e) + coordinates[e] = ast.LoopOverCoordinate.get_loop_counter_symbol(e) else: coordinates[e] = 0 coordinates[e] *= field.dtype.item_size @@ -527,11 +482,9 @@ def resolve_field_accesses(ast_node, assert field.index_dimensions == 1 accessed_field_name = field_access.index[0] assert isinstance(accessed_field_name, str) - coordinates[e] = field.dtype.get_element_offset( - accessed_field_name) + coordinates[e] = field.dtype.get_element_offset(accessed_field_name) else: - coordinates[e] = field_access.index[ - e - field.spatial_dimensions] + coordinates[e] = field_access.index[e - field.spatial_dimensions] return coordinates @@ -539,27 +492,19 @@ def resolve_field_accesses(ast_node, for group in reversed(base_pointer_info[1:]): coord_dict = create_coordinate_dict(group) - new_ptr, offset = create_intermediate_base_pointer( - field_access, coord_dict, last_pointer) + new_ptr, offset = create_intermediate_base_pointer(field_access, coord_dict, last_pointer) if new_ptr not in enclosing_block.symbols_defined: - new_assignment = ast.SympyAssignment(new_ptr, - last_pointer + offset, - is_const=False) - enclosing_block.insert_before(new_assignment, - sympy_assignment) + new_assignment = ast.SympyAssignment(new_ptr, last_pointer + offset, is_const=False) + enclosing_block.insert_before(new_assignment, sympy_assignment) last_pointer = new_ptr coord_dict = create_coordinate_dict(base_pointer_info[0]) - _, offset = create_intermediate_base_pointer( - field_access, coord_dict, last_pointer) - result = ast.ResolvedFieldAccess(last_pointer, offset, - field_access.field, - field_access.offsets, - field_access.index) + _, offset = create_intermediate_base_pointer(field_access, coord_dict, last_pointer) + result = ast.ResolvedFieldAccess(last_pointer, offset, field_access.field, + field_access.offsets, field_access.index) if isinstance(get_base_type(field_access.field.dtype), StructType): - new_type = field_access.field.dtype.get_element_type( - field_access.index[0]) + new_type = field_access.field.dtype.get_element_type(field_access.index[0]) result = reinterpret_cast_func(result, new_type) return visit_sympy_expr(result, enclosing_block, sympy_assignment) @@ -567,33 +512,30 @@ def resolve_field_accesses(ast_node, if isinstance(expr, ast.ResolvedFieldAccess): return expr - new_args = [ - visit_sympy_expr(e, enclosing_block, sympy_assignment) - for e in expr.args - ] - kwargs = { - 'evaluate': False - } if type(expr) in (sp.Add, sp.Mul, sp.Piecewise) else {} + if hasattr(expr, 'args'): + new_args = [visit_sympy_expr(e, enclosing_block, sympy_assignment) for e in expr.args] + else: + new_args = [] + kwargs = {'evaluate': False} if type(expr) in (sp.Add, sp.Mul, sp.Piecewise) else {} return expr.func(*new_args, **kwargs) if new_args else expr def visit_node(sub_ast): if isinstance(sub_ast, ast.SympyAssignment): enclosing_block = sub_ast.parent assert type(enclosing_block) is ast.Block - sub_ast.lhs = visit_sympy_expr(sub_ast.lhs, enclosing_block, - sub_ast) - sub_ast.rhs = visit_sympy_expr(sub_ast.rhs, enclosing_block, - sub_ast) + sub_ast.lhs = visit_sympy_expr(sub_ast.lhs, enclosing_block, sub_ast) + sub_ast.rhs = visit_sympy_expr(sub_ast.rhs, enclosing_block, sub_ast) elif isinstance(sub_ast, ast.Conditional): enclosing_block = sub_ast.parent assert type(enclosing_block) is ast.Block - sub_ast.condition_expr = visit_sympy_expr(sub_ast.condition_expr, - enclosing_block, sub_ast) + sub_ast.condition_expr = visit_sympy_expr(sub_ast.condition_expr, enclosing_block, sub_ast) visit_node(sub_ast.true_block) if sub_ast.false_block: visit_node(sub_ast.false_block) else: - for i, a in enumerate(sub_ast.args): + if isinstance(sub_ast, (bool, int, float)): + return + for a in sub_ast.args: visit_node(a) return visit_node(ast_node) @@ -632,14 +574,11 @@ def move_constants_before_loop(ast_node): element = element.parent return last_block, last_block_child - def check_if_assignment_already_in_block(assignment, - target_block, - rhs_or_lhs=True): + def check_if_assignment_already_in_block(assignment, target_block, rhs_or_lhs=True): for arg in target_block.args: if type(arg) is not ast.SympyAssignment: continue - if (rhs_or_lhs and arg.rhs == assignment.rhs) or ( - not rhs_or_lhs and arg.lhs == assignment.lhs): + if (rhs_or_lhs and arg.rhs == assignment.rhs) or (not rhs_or_lhs and arg.lhs == assignment.lhs): return arg return None @@ -662,24 +601,21 @@ def move_constants_before_loop(ast_node): # Before traversing the next child, all symbols are substituted first. child.subs(substitute_variables) - if not isinstance( - child, ast.SympyAssignment): # only move SympyAssignments + if not isinstance(child, ast.SympyAssignment): # only move SympyAssignments block.append(child) continue target, child_to_insert_before = find_block_to_move_to(child) - if target == block: # movement not possible + if target == block: # movement not possible target.append(child) else: if isinstance(child, ast.SympyAssignment): - exists_already = check_if_assignment_already_in_block( - child, target, False) + exists_already = check_if_assignment_already_in_block(child, target, False) else: exists_already = False if not exists_already: - rhs_identical = check_if_assignment_already_in_block( - child, target, True) + rhs_identical = check_if_assignment_already_in_block(child, target, True) if rhs_identical: # there is already an assignment out there with the same rhs # -> replace all lhs symbols in this block with the lhs of the outer assignment @@ -694,9 +630,7 @@ def move_constants_before_loop(ast_node): # -> symbol has to be renamed assert isinstance(child.lhs, TypedSymbol) new_symbol = TypedSymbol(sp.Dummy().name, child.lhs.dtype) - target.insert_before( - ast.SympyAssignment(new_symbol, child.rhs), - child_to_insert_before) + target.insert_before(ast.SympyAssignment(new_symbol, child.rhs), child_to_insert_before) substitute_variables[child.lhs] = new_symbol @@ -712,9 +646,7 @@ def split_inner_loop(ast_node: ast.Node, symbol_groups): """ all_loops = ast_node.atoms(ast.LoopOverCoordinate) inner_loop = [l for l in all_loops if l.is_innermost_loop] - assert len( - inner_loop - ) == 1, "Error in AST: multiple innermost loops. Was split transformation already called?" + assert len(inner_loop) == 1, "Error in AST: multiple innermost loops. Was split transformation already called?" inner_loop = inner_loop[0] assert type(inner_loop.body) is ast.Block outer_loop = [l for l in all_loops if l.is_outermost_loop] @@ -772,11 +704,8 @@ def split_inner_loop(ast_node: ast.Node, symbol_groups): inner_loop.parent.replace(inner_loop, ast.Block(new_loops)) for tmp_array in symbols_with_temporary_array: - tmp_array_pointer = TypedSymbol(tmp_array.name, - PointerType(tmp_array.dtype)) - alloc_node = ast.TemporaryMemoryAllocation(tmp_array_pointer, - inner_loop.stop, - inner_loop.start) + tmp_array_pointer = TypedSymbol(tmp_array.name, PointerType(tmp_array.dtype)) + alloc_node = ast.TemporaryMemoryAllocation(tmp_array_pointer, inner_loop.stop, inner_loop.start) free_node = ast.TemporaryMemoryFree(alloc_node) outer_loop.parent.insert_front(alloc_node) outer_loop.parent.append(free_node) @@ -815,8 +744,7 @@ def cut_loop(loop_node, cutting_points): return new_loops -def simplify_conditionals(node: ast.Node, - loop_counter_simplification: bool = False) -> None: +def simplify_conditionals(node: ast.Node, loop_counter_simplification: bool = False) -> None: """Removes conditionals that are always true/false. Args: @@ -832,18 +760,14 @@ def simplify_conditionals(node: ast.Node, if conditional.condition_expr == sp.true: conditional.parent.replace(conditional, [conditional.true_block]) elif conditional.condition_expr == sp.false: - conditional.parent.replace( - conditional, - [conditional.false_block] if conditional.false_block else []) + conditional.parent.replace(conditional, [conditional.false_block] if conditional.false_block else []) elif loop_counter_simplification: try: # noinspection PyUnresolvedReferences from pystencils.integer_set_analysis import simplify_loop_counter_dependent_conditional simplify_loop_counter_dependent_conditional(conditional) except ImportError: - warnings.warn( - "Integer simplifications in conditionals skipped, because ISLpy package not installed" - ) + warnings.warn("Integer simplifications in conditionals skipped, because ISLpy package not installed") def cleanup_blocks(node: ast.Node) -> None: @@ -891,11 +815,18 @@ class KernelConstraintsCheck: return ast.SympyAssignment(new_lhs, new_rhs) def process_expression(self, rhs, type_constants=True): + from pystencils.interpolation_astnodes import InterpolatorAccess + self._update_accesses_rhs(rhs) if isinstance(rhs, AbstractField.AbstractAccess): self.fields_read.add(rhs.field) self.fields_read.update(rhs.indirect_addressing_fields) return rhs + elif isinstance(rhs, InterpolatorAccess): + new_args = [self.process_expression(arg, type_constants) for arg in rhs.offsets] + if new_args: + rhs.offsets = new_args + return rhs elif isinstance(rhs, TypedSymbol): return rhs elif isinstance(rhs, sp.Symbol): @@ -904,6 +835,20 @@ class KernelConstraintsCheck: return cast_func(rhs, create_type(rhs.dtype)) elif type_constants and isinstance(rhs, sp.Number): return cast_func(rhs, create_type(self._type_for_symbol['_constant'])) + # Very important that this clause comes before BooleanFunction + elif isinstance(rhs, cast_func): + return cast_func( + self.process_expression(rhs.args[0], type_constants=False), + rhs.dtype) + elif isinstance(rhs, sp.boolalg.BooleanFunction) or \ + type(rhs) in pystencils.integer_functions.__dict__.values(): + new_args = [self.process_expression(a, type_constants) for a in rhs.args] + types_of_expressions = [get_type_of_expression(a) for a in new_args] + arg_type = collate_types(types_of_expressions, forbid_collation_to_float=True) + new_args = [a if not hasattr(a, 'dtype') or a.dtype == arg_type + else cast_func(a, arg_type) + for a in new_args] + return rhs.func(*new_args) elif isinstance(rhs, sp.Mul): new_args = [ self.process_expression(arg, type_constants) @@ -912,10 +857,6 @@ class KernelConstraintsCheck: return rhs.func(*new_args) if new_args else rhs elif isinstance(rhs, sp.Indexed): return rhs - elif isinstance(rhs, cast_func): - return cast_func( - self.process_expression(rhs.args[0], type_constants=False), - rhs.dtype) else: if isinstance(rhs, sp.Pow): # don't process exponents -> they should remain integers @@ -961,17 +902,14 @@ class KernelConstraintsCheck: self.scopes.define_symbol(lhs) def _update_accesses_rhs(self, rhs): - if isinstance(rhs, AbstractField.AbstractAccess - ) and self.check_independence_condition: + if isinstance(rhs, AbstractField.AbstractAccess) and self.check_independence_condition: writes = self._field_writes[self.FieldAndIndex( rhs.field, rhs.index)] for write_offset in writes: assert len(writes) == 1 if write_offset != rhs.offsets: - raise ValueError( - "Violation of loop independence condition. Field " - "{} is read at {} and written at {}".format( - rhs.field, rhs.offsets, write_offset)) + raise ValueError("Violation of loop independence condition. Field " + "{} is read at {} and written at {}".format(rhs.field, rhs.offsets, write_offset)) self.fields_read.add(rhs.field) elif isinstance(rhs, sp.Symbol): self.scopes.access_symbol(rhs) @@ -992,15 +930,10 @@ def add_types(eqs, type_for_symbol, check_independence_condition): ``fields_read, fields_written, typed_equations`` set of read fields, set of written fields, list of equations where symbols have been replaced by typed symbols """ - if isinstance(type_for_symbol, - str) or not hasattr(type_for_symbol, '__getitem__'): + if isinstance(type_for_symbol, str) or not hasattr(type_for_symbol, '__getitem__'): type_for_symbol = typing_from_sympy_inspection(eqs, type_for_symbol) - # assignments = ast.Block(eqs).atoms(ast.Assignment) - # type_for_symbol.update( {a.lhs: get_type_of_expression(a.rhs) for a in assignments}) - # print(type_for_symbol) - check = KernelConstraintsCheck(type_for_symbol, - check_independence_condition) + check = KernelConstraintsCheck(type_for_symbol, check_independence_condition) def visit(obj): if isinstance(obj, (list, tuple)): @@ -1022,8 +955,7 @@ def add_types(eqs, type_for_symbol, check_independence_condition): result = ast.Block([visit(e) for e in obj.args]) check.scopes.pop() return result - elif isinstance( - obj, ast.Node) and not isinstance(obj, ast.LoopOverCoordinate): + elif isinstance(obj, ast.Node) and not isinstance(obj, ast.LoopOverCoordinate): return obj else: raise ValueError("Invalid object in kernel " + str(type(obj))) @@ -1082,12 +1014,12 @@ def insert_casts(node): for arg in node.args: args.append(insert_casts(arg)) # TODO indexed, LoopOverCoordinate - if node.func in (sp.Add, sp.Mul, sp.Or, sp.And, sp.Pow, sp.Eq, sp.Ne, - sp.Lt, sp.Le, sp.Gt, sp.Ge): + if node.func in (sp.Add, sp.Mul, sp.Or, sp.And, sp.Pow, sp.Eq, sp.Ne, sp.Lt, sp.Le, sp.Gt, sp.Ge): # TODO optimize pow, don't cast integer on double types = [get_type_of_expression(arg) for arg in args] assert len(types) > 0 - target = collate_types(types) + # Never ever, ever collate to float type for boolean functions! + target = collate_types(types, forbid_collation_to_float=isinstance(node.func, sp.boolalg.BooleanFunction)) zipped = list(zip(args, types)) if target.func is PointerType: assert node.func is sp.Add @@ -1101,8 +1033,7 @@ def insert_casts(node): if target.func is PointerType: return node.func(*args) # TODO fix, not complete else: - return node.func( - lhs, *cast([(rhs, get_type_of_expression(rhs))], target)) + return node.func(lhs, *cast([(rhs, get_type_of_expression(rhs))], target)) elif node.func is ast.ResolvedFieldAccess: return node elif node.func is ast.Block: @@ -1127,22 +1058,14 @@ def insert_casts(node): return node.func(*args) -def remove_conditionals_in_staggered_kernel(function_node: ast.KernelFunction - ) -> None: +def remove_conditionals_in_staggered_kernel(function_node: ast.KernelFunction) -> None: """Removes conditionals of a kernel that iterates over staggered positions by splitting the loops at last element""" - all_inner_loops = [ - l for l in function_node.atoms(ast.LoopOverCoordinate) - if l.is_innermost_loop - ] - assert len( - all_inner_loops - ) == 1, "Transformation works only on kernels with exactly one inner loop" + all_inner_loops = [l for l in function_node.atoms(ast.LoopOverCoordinate) if l.is_innermost_loop] + assert len(all_inner_loops) == 1, "Transformation works only on kernels with exactly one inner loop" inner_loop = all_inner_loops.pop() - for loop in parents_of_type(inner_loop, - ast.LoopOverCoordinate, - include_current=True): + for loop in parents_of_type(inner_loop, ast.LoopOverCoordinate, include_current=True): cut_loop(loop, [loop.stop - 1]) simplify_conditionals(function_node.body, loop_counter_simplification=True) @@ -1363,3 +1286,57 @@ def loop_blocking(ast_node: ast.KernelFunction, block_size) -> int: inner_loop.start = block_ctr inner_loop.stop = stop return len(coordinates) + + +def implement_interpolations(ast_node: ast.Node, + implement_by_texture_accesses: bool = False, + vectorize: bool = False, + use_hardware_interpolation_for_f32=True): + from pystencils.interpolation_astnodes import InterpolatorAccess, TextureAccess, TextureCachedField + # TODO: perform this function on assignments, when unify_shape_symbols allows differently sized fields + + assert not(implement_by_texture_accesses and vectorize), \ + "can only implement interpolations either by texture accesses or CPU vectorization" + FLOAT32_T = create_type('float32') + + interpolation_accesses = ast_node.atoms(InterpolatorAccess) + + def can_use_hw_interpolation(i): + return use_hardware_interpolation_for_f32 and i.dtype == FLOAT32_T and isinstance(i, TextureAccess) + + if implement_by_texture_accesses: + + interpolators = {a.symbol.interpolator for a in interpolation_accesses} + to_texture_map = {i: TextureCachedField.from_interpolator(i) for i in interpolators} + + substitutions = {i: to_texture_map[i.symbol.interpolator].at( + [o for o in i.offsets]) for i in interpolation_accesses} + + import pycuda.driver as cuda + for texture in substitutions.values(): + if can_use_hw_interpolation(texture): + texture.filter_mode = cuda.filter_mode.LINEAR + else: + texture.filter_mode = cuda.filter_mode.POINT + texture.read_as_integer = True + + if isinstance(ast_node, AssignmentCollection): + ast_node = ast_node.subs(substitutions) + else: + ast_node.subs(substitutions) + + # Update after replacements + interpolation_accesses = ast_node.atoms(InterpolatorAccess) + + if vectorize: + # TODO can be done in _interpolator_access_to_stencils field.absolute_access == simd_gather + raise NotImplementedError() + else: + substitutions = {i: i.implementation_with_stencils() + for i in interpolation_accesses if not can_use_hw_interpolation(i)} + if isinstance(ast_node, AssignmentCollection): + ast_node = ast_node.subs(substitutions) + else: + ast_node.subs(substitutions) + + return ast_node diff --git a/pystencils_tests/test_address_of.py b/pystencils_tests/test_address_of.py index 9aac08d107f3fc1fc69703cb7f348b28fb516f65..b4c46c1c6045520e30826fe84ac91ae76339ad1b 100644 --- a/pystencils_tests/test_address_of.py +++ b/pystencils_tests/test_address_of.py @@ -3,17 +3,17 @@ Test of pystencils.data_types.address_of """ import pystencils -from pystencils.data_types import PointerType, address_of, cast_func +from pystencils.data_types import PointerType, address_of, cast_func, create_type from pystencils.simp.simplifications import sympy_cse def test_address_of(): x, y = pystencils.fields('x,y: int64[2d]') - s = pystencils.TypedSymbol('s', PointerType('int64')) + s = pystencils.TypedSymbol('s', PointerType(create_type('int64'))) assignments = pystencils.AssignmentCollection({ s: address_of(x[0, 0]), - y[0, 0]: cast_func(s, 'int64') + y[0, 0]: cast_func(s, create_type('int64')) }, {}) ast = pystencils.create_kernel(assignments) @@ -21,7 +21,7 @@ def test_address_of(): print(code) assignments = pystencils.AssignmentCollection({ - y[0, 0]: cast_func(address_of(x[0, 0]), 'int64') + y[0, 0]: cast_func(address_of(x[0, 0]), create_type('int64')) }, {}) ast = pystencils.create_kernel(assignments) @@ -31,10 +31,11 @@ def test_address_of(): def test_address_of_with_cse(): x, y = pystencils.fields('x,y: int64[2d]') + s = pystencils.TypedSymbol('s', PointerType(create_type('int64'))) assignments = pystencils.AssignmentCollection({ - y[0, 0]: cast_func(address_of(x[0, 0]), 'int64'), - x[0, 0]: cast_func(address_of(x[0, 0]), 'int64') + 1 + y[0, 0]: cast_func(address_of(x[0, 0]), create_type('int64')) + s, + x[0, 0]: cast_func(address_of(x[0, 0]), create_type('int64')) + 1 }, {}) ast = pystencils.create_kernel(assignments) diff --git a/pystencils_tests/test_data/lenna.png b/pystencils_tests/test_data/lenna.png new file mode 100644 index 0000000000000000000000000000000000000000..59ef68aabd033341028ccd2b1f6c5871c02b9d7b Binary files /dev/null and b/pystencils_tests/test_data/lenna.png differ diff --git a/pystencils_tests/test_data/test_vessel2d_mask.png b/pystencils_tests/test_data/test_vessel2d_mask.png new file mode 100644 index 0000000000000000000000000000000000000000..1e86b699b10cd8749f899a7600cd0a46fbe98b7b Binary files /dev/null and b/pystencils_tests/test_data/test_vessel2d_mask.png differ diff --git a/pystencils_tests/test_field_coordinates.py b/pystencils_tests/test_field_coordinates.py new file mode 100644 index 0000000000000000000000000000000000000000..1aeed343bc0c77d6e6b56b3d48e68c100be3f56c --- /dev/null +++ b/pystencils_tests/test_field_coordinates.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- +# +# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de> +# +# Distributed under terms of the GPLv3 license. + +""" + +""" +from os.path import dirname, join + +import numpy as np +import sympy + +import pystencils +from pystencils.interpolation_astnodes import LinearInterpolator + +try: + import pyconrad.autoinit +except Exception: + import unittest.mock + pyconrad = unittest.mock.MagicMock() + +LENNA_FILE = join(dirname(__file__), 'test_data', 'lenna.png') + +try: + import skimage.io + lenna = skimage.io.imread(LENNA_FILE, as_gray=True).astype(np.float32) +except Exception: + lenna = np.random.rand(20, 30) + + +def test_rotate_center(): + x, y = pystencils.fields('x, y: float32[2d]') + + # Rotate around center when setting coordindates origins to field centers + x.set_coordinate_origin_to_field_center() + y.set_coordinate_origin_to_field_center() + + rotation_angle = sympy.pi / 5 + transform_matrix = sympy.rot_axis3(rotation_angle)[:2, :2] + + # Generic matrix transform works like that (for rotation it would be more clever to use transform_matrix.T) + inverse_matrix = transform_matrix.inv() + input_coordinate = x.physical_to_index(inverse_matrix @ y.physical_coordinates) + + assignments = pystencils.AssignmentCollection({ + y.center(): LinearInterpolator(x).at(input_coordinate) + }) + + kernel = pystencils.create_kernel(assignments).compile() + rotated = np.zeros_like(lenna) + + kernel(x=lenna, y=rotated) + + pyconrad.imshow(lenna, "lenna") + pyconrad.imshow(rotated, "rotated") + + # If distance in input field is twice as close we will see a smaller image + x.coordinate_transform /= 2 + + input_coordinate = x.physical_to_index(inverse_matrix @ y.physical_coordinates) + + assignments = pystencils.AssignmentCollection({ + y.center(): LinearInterpolator(x).at(input_coordinate) + }) + + kernel = pystencils.create_kernel(assignments).compile() + rotated = np.zeros_like(lenna) + + kernel(x=lenna, y=rotated) + + pyconrad.imshow(rotated, "rotated smaller") + + # Conversely, if output field has samples 3 times closer we will see a bigger image + y.coordinate_transform /= 3 + + input_coordinate = x.physical_to_index(inverse_matrix @ y.physical_coordinates) + + assignments = pystencils.AssignmentCollection({ + y.center(): LinearInterpolator(x).at(input_coordinate) + }) + + kernel = pystencils.create_kernel(assignments).compile() + rotated = np.zeros_like(lenna) + + kernel(x=lenna, y=rotated) + + pyconrad.imshow(rotated, "rotated bigger") + + # coordinate_transform can be any matrix, also with symbols as entries + + +def main(): + test_rotate_center() + + +if __name__ == '__main__': + main() diff --git a/pystencils_tests/test_global_definitions.py b/pystencils_tests/test_global_definitions.py index 52def8c6b4300ef5afdeab617ac1b0807562f5c3..c08557018feb49fe7d8d6beeaa9e5ab0e43e99f5 100644 --- a/pystencils_tests/test_global_definitions.py +++ b/pystencils_tests/test_global_definitions.py @@ -14,7 +14,7 @@ class BogusDeclaration(pystencils.astnodes.Node): @property def args(self): """Returns all arguments/children of this node.""" - raise set() + return set() @property def symbols_defined(self): diff --git a/pystencils_tests/test_interpolation.py b/pystencils_tests/test_interpolation.py new file mode 100644 index 0000000000000000000000000000000000000000..02fb2c3934c2514bb804c46198e4352daa3307c5 --- /dev/null +++ b/pystencils_tests/test_interpolation.py @@ -0,0 +1,233 @@ +# -*- coding: utf-8 -*- +# +# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de> +# +# Distributed under terms of the GPLv3 license. + +""" + +""" +from os.path import dirname, join + +import numpy as np +import sympy + +import pycuda.autoinit # NOQA +import pycuda.gpuarray as gpuarray +import pystencils +from pystencils.interpolation_astnodes import LinearInterpolator +from pystencils.spatial_coordinates import x_, y_ + +type_map = { + np.float32: 'float32', + np.float64: 'float64', + np.int32: 'int32', +} + +try: + import pyconrad.autoinit +except Exception: + import unittest.mock + pyconrad = unittest.mock.MagicMock() + +LENNA_FILE = join(dirname(__file__), 'test_data', 'lenna.png') + +try: + import skimage.io + lenna = skimage.io.imread(LENNA_FILE, as_gray=True).astype(np.float64) + pyconrad.imshow(lenna) +except Exception: + lenna = np.random.rand(20, 30) + + +def test_interpolation(): + x_f, y_f = pystencils.fields('x,y: float64 [2d]') + + assignments = pystencils.AssignmentCollection({ + y_f.center(): LinearInterpolator(x_f).at([x_ + 2.7, y_ + 7.2]) + }) + print(assignments) + ast = pystencils.create_kernel(assignments) + print(ast) + print(pystencils.show_code(ast)) + kernel = ast.compile() + + pyconrad.imshow(lenna) + + out = np.zeros_like(lenna) + kernel(x=lenna, y=out) + pyconrad.imshow(out, "out") + + +def test_scale_interpolation(): + x_f, y_f = pystencils.fields('x,y: float64 [2d]') + + for address_mode in ['border', 'wrap', 'clamp', 'mirror']: + assignments = pystencils.AssignmentCollection({ + y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at([0.5 * x_ + 2.7, 0.25 * y_ + 7.2]) + }) + print(assignments) + ast = pystencils.create_kernel(assignments) + print(ast) + print(pystencils.show_code(ast)) + kernel = ast.compile() + + out = np.zeros_like(lenna) + kernel(x=lenna, y=out) + pyconrad.imshow(out, "out " + address_mode) + + +def test_rotate_interpolation(): + x_f, y_f = pystencils.fields('x,y: float64 [2d]') + + rotation_angle = sympy.pi / 5 + + for address_mode in ['border', 'wrap', 'clamp', 'mirror']: + + transformed = sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) + assignments = pystencils.AssignmentCollection({ + y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed) + }) + print(assignments) + ast = pystencils.create_kernel(assignments) + print(ast) + print(pystencils.show_code(ast)) + kernel = ast.compile() + + out = np.zeros_like(lenna) + kernel(x=lenna, y=out) + pyconrad.imshow(out, "out " + address_mode) + + +def test_rotate_interpolation_gpu(): + + rotation_angle = sympy.pi / 5 + scale = 1 + + for address_mode in ['border', 'wrap', 'clamp', 'mirror']: + previous_result = None + for dtype in [np.int32, np.float32, np.float64]: + if dtype == np.int32: + lenna_gpu = gpuarray.to_gpu( + np.ascontiguousarray(lenna * 255, dtype)) + else: + lenna_gpu = gpuarray.to_gpu( + np.ascontiguousarray(lenna, dtype)) + for use_textures in [True, False]: + x_f, y_f = pystencils.fields('x,y: %s [2d]' % type_map[dtype], ghost_layers=0) + + transformed = scale * \ + sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) - sympy.Matrix([2, 2]) + assignments = pystencils.AssignmentCollection({ + y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed) + }) + print(assignments) + ast = pystencils.create_kernel(assignments, target='gpu', use_textures_for_interpolation=use_textures) + print(ast) + print(pystencils.show_code(ast)) + kernel = ast.compile() + + out = gpuarray.zeros_like(lenna_gpu) + kernel(x=lenna_gpu, y=out) + pyconrad.imshow(out, + f"out {address_mode} texture:{use_textures} {type_map[dtype]}") + skimage.io.imsave(f"/tmp/out {address_mode} texture:{use_textures} {type_map[dtype]}.tif", + np.ascontiguousarray(out.get(), np.float32)) + if previous_result is not None: + try: + assert np.allclose(previous_result[4:-4, 4:-4], out.get()[4:-4, 4:-4], rtol=100, atol=1e-3) + except AssertionError as e: # NOQA + print("Max error: %f" % np.max(previous_result - out.get())) + # pyconrad.imshow(previous_result - out.get(), "Difference image") + # raise e + previous_result = out.get() + + +def test_shift_interpolation_gpu(): + + rotation_angle = 0 # sympy.pi / 5 + scale = 1 + # shift = - sympy.Matrix([1.5, 1.5]) + shift = sympy.Matrix((0.0, 0.0)) + + for address_mode in ['border', 'wrap', 'clamp', 'mirror']: + previous_result = None + for dtype in [np.float64, np.float32, np.int32]: + if dtype == np.int32: + lenna_gpu = gpuarray.to_gpu( + np.ascontiguousarray(lenna * 255, dtype)) + else: + lenna_gpu = gpuarray.to_gpu( + np.ascontiguousarray(lenna, dtype)) + for use_textures in [True, False]: + x_f, y_f = pystencils.fields('x,y: %s [2d]' % type_map[dtype], ghost_layers=0) + + if use_textures: + transformed = scale * sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) + shift + else: + transformed = scale * sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) + shift + assignments = pystencils.AssignmentCollection({ + y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed) + }) + # print(assignments) + ast = pystencils.create_kernel(assignments, target='gpu', use_textures_for_interpolation=use_textures) + # print(ast) + print(pystencils.show_code(ast)) + kernel = ast.compile() + + out = gpuarray.zeros_like(lenna_gpu) + kernel(x=lenna_gpu, y=out) + pyconrad.imshow(out, + f"out {address_mode} texture:{use_textures} {type_map[dtype]}") + skimage.io.imsave(f"/tmp/out {address_mode} texture:{use_textures} {type_map[dtype]}.tif", + np.ascontiguousarray(out.get(), np.float32)) + # if not (use_single_precision and use_textures): + # if previous_result is not None: + # try: + # assert np.allclose(previous_result[4:-4, 4:-4], out.get() + # [4:-4, 4:-4], rtol=1e-3, atol=1e-2) + # except AssertionError as e: + # print("Max error: %f" % np.max(np.abs(previous_result[4:-4, 4:-4] - out.get()[4:-4, 4:-4]))) + # pyconrad.imshow(previous_result[4:-4, 4:-4] - out.get()[4:-4, 4:-4], "Difference image") + # raise e + # previous_result = out.get() + + +def test_rotate_interpolation_size_change(): + x_f, y_f = pystencils.fields('x,y: float64 [2d]') + + rotation_angle = sympy.pi / 5 + + for address_mode in ['border', 'wrap', 'clamp', 'mirror']: + + transformed = sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) + assignments = pystencils.AssignmentCollection({ + y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed) + }) + print(assignments) + ast = pystencils.create_kernel(assignments) + print(ast) + print(pystencils.show_code(ast)) + kernel = ast.compile() + + out = np.zeros((100, 150), np.float64) + kernel(x=lenna, y=out) + pyconrad.imshow(out, "small out " + address_mode) + + +def test_field_interpolated(): + x_f, y_f = pystencils.fields('x,y: float64 [2d]') + + for address_mode in ['border', 'wrap', 'clamp', 'mirror']: + assignments = pystencils.AssignmentCollection({ + y_f.center(): x_f.interpolated_access([0.5 * x_ + 2.7, 0.25 * y_ + 7.2], address_mode=address_mode) + }) + print(assignments) + ast = pystencils.create_kernel(assignments) + print(ast) + print(pystencils.show_code(ast)) + kernel = ast.compile() + + out = np.zeros_like(lenna) + kernel(x=lenna, y=out) + pyconrad.imshow(out, "out " + address_mode) diff --git a/pystencils_tests/test_json_backend.py b/pystencils_tests/test_json_backend.py new file mode 100644 index 0000000000000000000000000000000000000000..bc9765c477d667c129195dfa7cdc2565e7e84e38 --- /dev/null +++ b/pystencils_tests/test_json_backend.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +# +# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de> +# +# Distributed under terms of the GPLv3 license. + +""" + +""" + +import sympy + +import pystencils +from pystencils.backends.json import print_json + + +def test_json_backend(): + + z, y, x = pystencils.fields("z, y, x: [20,40]") + a = sympy.Symbol('a') + + assignments = pystencils.AssignmentCollection({ + z[0, 0]: x[0, 0] * sympy.log(a * x[0, 0] * y[0, 0]) + }) + + ast = pystencils.create_kernel(assignments) + + print(print_json(ast)) diff --git a/pystencils_tests/test_source_code_comment.py b/pystencils_tests/test_source_code_comment.py new file mode 100644 index 0000000000000000000000000000000000000000..6cc66feb6f7f92174029e274feee22dfc4455b8d --- /dev/null +++ b/pystencils_tests/test_source_code_comment.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +# +# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de> +# +# Distributed under terms of the GPLv3 license. + +""" + +""" +import pystencils +import pystencils.astnodes + + +def test_source_code_comment(): + + a, b = pystencils.fields('a,b: float[2D]') + + assignments = pystencils.AssignmentCollection( + {a.center(): b[0, 2] + b[0, 0]}, {} + ) + + ast = pystencils.create_kernel(assignments, target='cpu') + + ast.body.append(pystencils.astnodes.SourceCodeComment("Hallo")) + ast.body.append(pystencils.astnodes.EmptyLine()) + ast.body.append(pystencils.astnodes.SourceCodeComment("World!")) + print(ast) + compiled = ast.compile() + assert compiled is not None + print(pystencils.show_code(ast))