Commit 8cf7aa27 authored by Michael Kuron's avatar Michael Kuron
Browse files

Merge branch 'master' of i10git.cs.fau.de:pycodegen/pystencils into oldsympy

parents eb1c6ea6 1cabb407
......@@ -10,11 +10,12 @@ from sympy.printing.ccode import C89CodePrinter
from pystencils.astnodes import KernelFunction, Node
from pystencils.cpu.vectorization import vec_all, vec_any
from pystencils.data_types import (
PointerType, VectorType, address_of, cast_func, create_type, get_type_of_expression, reinterpret_cast_func,
vector_memory_access)
PointerType, VectorType, address_of, cast_func, create_type, get_type_of_expression,
reinterpret_cast_func, vector_memory_access)
from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
from pystencils.integer_functions import (
bit_shift_left, bit_shift_right, bitwise_and, bitwise_or, bitwise_xor, int_div, int_power_of_2, modulo_ceil)
bit_shift_left, bit_shift_right, bitwise_and, bitwise_or, bitwise_xor,
int_div, int_power_of_2, modulo_ceil)
try:
from sympy.printing.ccode import C99CodePrinter as CCodePrinter
......@@ -91,7 +92,7 @@ def get_global_declarations(ast):
visit_node(ast)
return sorted(set(global_declarations), key=lambda x: str(x))
return sorted(set(global_declarations), key=str)
def get_headers(ast_node: Node) -> Set[str]:
......
......@@ -3,7 +3,7 @@ from os.path import dirname, join
from pystencils.astnodes import Node
from pystencils.backends.cbackend import CBackend, CustomSympyPrinter, generate_c
from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
from pystencils.interpolation_astnodes import InterpolationMode
from pystencils.interpolation_astnodes import DiffInterpolatorAccess, InterpolationMode
with open(join(dirname(__file__), 'cuda_known_functions.txt')) as f:
lines = f.readlines()
......@@ -70,10 +70,13 @@ class CudaSympyPrinter(CustomSympyPrinter):
super(CudaSympyPrinter, self).__init__()
self.known_functions.update(CUDA_KNOWN_FUNCTIONS)
def _print_TextureAccess(self, node):
dtype = node.texture.field.dtype.numpy_dtype
def _print_InterpolatorAccess(self, node):
dtype = node.interpolator.field.dtype.numpy_dtype
if node.texture.interpolation_mode == InterpolationMode.CUBIC_SPLINE:
if type(node) == DiffInterpolatorAccess:
# cubicTex3D_1st_derivative_x(texture tex, float3 coord)
template = f"cubicTex%iD_1st_derivative_{list(reversed('xyz'[:node.ndim]))[node.diff_coordinate_idx]}(%s, %s)" # noqa
elif node.interpolator.interpolation_mode == InterpolationMode.CUBIC_SPLINE:
template = "cubicTex%iDSimple(%s, %s)"
else:
if dtype.itemsize > 4:
......@@ -84,8 +87,8 @@ class CudaSympyPrinter(CustomSympyPrinter):
template = "tex%iD(%s, %s)"
code = template % (
node.texture.field.spatial_dimensions,
str(node.texture),
node.interpolator.field.spatial_dimensions,
str(node.interpolator),
# + 0.5 comes from Nvidia's staggered indexing
', '.join(self._print(o + 0.5) for o in reversed(node.offsets))
)
......
......@@ -111,6 +111,16 @@ class Diff(sp.Expr):
def __str__(self):
return "D(%s)" % self.arg
def interpolated_access(self, offset, **kwargs):
"""Represents an interpolated access on a spatially differentiated field
Args:
offset (Tuple[sympy.Expr]): Absolute position to determine the value of the spatial derivative
"""
from pystencils.interpolation_astnodes import DiffInterpolatorAccess
assert isinstance(self.arg.field, Field), "Must be field to enable interpolated accesses"
return DiffInterpolatorAccess(self.arg.field.interpolated_access(offset, **kwargs).symbol, self.target, *offset)
class DiffOperator(sp.Expr):
"""Un-applied differential, i.e. differential operator
......
......@@ -109,7 +109,9 @@ class Discretization2ndOrder:
return self._discretize_advection(e)
elif isinstance(e, Diff):
arg, *indices = diff_args(e)
if not isinstance(arg, Field.Access):
from pystencils.interpolation_astnodes import InterpolatorAccess
if not isinstance(arg, (Field.Access, InterpolatorAccess)):
raise ValueError("Only derivatives with field or field accesses as arguments can be discretized")
return self.spatial_stencil(indices, self.dx, arg)
else:
......
......@@ -332,7 +332,7 @@ class Field(AbstractField):
self.latex_name: Optional[str] = None
self.coordinate_origin: tuple[float, sp.Symbol] = sp.Matrix(tuple(
0 for _ in range(self.spatial_dimensions)
)) # type
))
self.coordinate_transform = sp.eye(self.spatial_dimensions)
if field_type == FieldType.STAGGERED:
assert self.staggered_stencil
......
......@@ -5,13 +5,20 @@ from pystencils.data_types import StructType
from pystencils.field import FieldType
from pystencils.gpucuda.texture_utils import ndarray_to_tex
from pystencils.include import get_pycuda_include_path, get_pystencils_include_path
from pystencils.interpolation_astnodes import TextureAccess
from pystencils.interpolation_astnodes import InterpolatorAccess, TextureCachedField
from pystencils.kernel_wrapper import KernelWrapper
from pystencils.kernelparameters import FieldPointerSymbol
USE_FAST_MATH = True
def get_cubic_interpolation_include_paths():
from os.path import join, dirname
return [join(dirname(__file__), "CubicInterpolationCUDA", "code"),
join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")]
def make_python_function(kernel_function_node, argument_dict=None, custom_backend=None):
"""
Creates a kernel function from an abstract syntax tree which
......@@ -39,23 +46,28 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
code += "#define FUNC_PREFIX __global__\n"
code += "#define RESTRICT __restrict__\n\n"
code += str(generate_c(kernel_function_node, dialect='cuda', custom_backend=custom_backend))
textures = set(d.texture for d in kernel_function_node.atoms(TextureAccess))
textures = set(d.interpolator for d in kernel_function_node.atoms(
InterpolatorAccess) if isinstance(d.interpolator, TextureCachedField))
nvcc_options = ["-w", "-std=c++11", "-Wno-deprecated-gpu-targets"]
if USE_FAST_MATH:
nvcc_options.append("-use_fast_math")
# Code for
# if any(t.interpolation_mode == InterpolationMode.CUBIC_SPLINE for t in textures):
# assert isdir(join(dirname(__file__), "CubicInterpolationCUDA", "code")), \
# "Submodule CubicInterpolationCUDA does not exist"
# nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code")]
# nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")]
# needed_dims = set(t.field.spatial_dimensions for t in textures
# if t.interpolation_mode == InterpolationMode.CUBIC_SPLINE)
# for i in needed_dims:
# code = 'extern "C++" {\n#include "cubicTex%iD.cu"\n}\n' % i + code
# Code for CubicInterpolationCUDA
from pystencils.interpolation_astnodes import InterpolationMode
from os.path import join, dirname, isdir
if any(t.interpolation_mode == InterpolationMode.CUBIC_SPLINE for t in textures):
assert isdir(join(dirname(__file__), ("CubicInterpolationCUDA", "code")),
"Submodule CubicInterpolationCUDA does not exist.\n"
+ "Clone https://github.com/theHamsta/CubicInterpolationCUDA into pystencils.gpucuda")
nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code")]
nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")]
needed_dims = set(t.field.spatial_dimensions for t in textures
if t.interpolation_mode == InterpolationMode.CUBIC_SPLINE)
for i in needed_dims:
code = 'extern "C++" {\n#include "cubicTex%iD.cu"\n}\n' % i + code
mod = SourceModule(code, options=nvcc_options, include_dirs=[
get_pystencils_include_path(), get_pycuda_include_path()])
......
......@@ -15,6 +15,7 @@ import numpy as np
try:
import pycuda.driver as cuda
from pycuda import gpuarray
import pycuda
except Exception:
pass
......@@ -35,6 +36,8 @@ def ndarray_to_tex(tex_ref,
use_normalized_coordinates=False,
read_as_integer=False):
if isinstance(address_mode, str):
address_mode = getattr(pycuda.driver.address_mode, address_mode.upper())
if address_mode is None:
address_mode = cuda.address_mode.BORDER
if filter_mode is None:
......
......@@ -35,6 +35,27 @@ class InterpolationMode(str, Enum):
CUBIC_SPLINE = "cubic_spline"
class _InterpolationSymbol(TypedSymbol):
def __new__(cls, name, field, interpolator):
obj = cls.__xnew_cached_(cls, name, field, interpolator)
return obj
def __new_stage2__(cls, name, field, interpolator):
obj = super().__xnew__(cls, name, 'dummy_symbol_carrying_field' + field.name)
obj.field = field
obj.interpolator = interpolator
return obj
def __getnewargs__(self):
return self.name, self.field, self.interpolator
# noinspection SpellCheckingInspection
__xnew__ = staticmethod(__new_stage2__)
# noinspection SpellCheckingInspection
__xnew_cached_ = staticmethod(cacheit(__new_stage2__))
class Interpolator(object):
"""
Implements non-integer accesses on fields using linear interpolation.
......@@ -77,24 +98,25 @@ class Interpolator(object):
allow_textures=True):
super().__init__()
self.field = parent_field.new_field_with_different_name(parent_field.name)
self.field = parent_field
self.field.field_type = pystencils.field.FieldType.CUSTOM
self.address_mode = address_mode
self.use_normalized_coordinates = use_normalized_coordinates
hash_str = "%x" % abs(hash(self.field) + hash(address_mode))
self.symbol = TypedSymbol('dummy_symbol_carrying_field' + self.field.name + hash_str,
'dummy_symbol_carrying_field' + self.field.name + hash_str)
self.symbol.field = self.field
self.symbol.interpolator = self
self.allow_textures = allow_textures
self.interpolation_mode = interpolation_mode
self.hash_str = hashlib.md5(
f'{self.field}_{address_mode}_{self.field.dtype}_{interpolation_mode}'.encode()).hexdigest()
self.symbol = _InterpolationSymbol(str(self), parent_field, self)
self.allow_textures = allow_textures
@property
def ndim(self):
return self.field.ndim
@property
def _hashable_contents(self):
return (str(self.address_mode),
str(type(self)),
self.symbol,
self.address_mode,
self.hash_str,
self.use_normalized_coordinates)
def at(self, offset):
......@@ -112,6 +134,9 @@ class Interpolator(object):
def __hash__(self):
return hash(self._hashable_contents)
def __eq__(self, other):
return hash(self) == hash(other)
@property
def reproducible_hash(self):
return _hash(str(self._hashable_contents).encode()).hexdigest()
......@@ -142,29 +167,43 @@ class NearestNeightborInterpolator(Interpolator):
class InterpolatorAccess(TypedSymbol):
def __new__(cls, field, offsets, *args, **kwargs):
obj = TextureAccess.__xnew_cached_(cls, field, offsets, *args, **kwargs)
def __new__(cls, field, *offsets, **kwargs):
obj = InterpolatorAccess.__xnew_cached_(cls, field, *offsets, **kwargs)
return obj
def __new_stage2__(self, symbol, *offsets):
def __new_stage2__(cls, symbol, *offsets):
assert offsets is not None
obj = super().__xnew__(self, '%s_interpolator_%x' %
(symbol.field.name, abs(hash(tuple(offsets)))), symbol.field.dtype)
obj = super().__xnew__(cls, '%s_interpolator_%s' %
(symbol.field.name, _hash(str(tuple(offsets)).encode()).hexdigest()),
symbol.field.dtype)
obj.offsets = offsets
obj.symbol = symbol
obj.field = symbol.field
obj.interpolator = symbol.interpolator
return obj
def __hash__(self):
return hash((self.symbol, self.field, tuple(self.offsets), self.interpolator))
def _hashable_contents(self):
return super()._hashable_content() + ((self.symbol, self.field, tuple(self.offsets), self.symbol.interpolator))
def __str__(self):
return '%s_interpolator(%s)' % (self.field.name, ','.join(str(o) for o in self.offsets))
return '%s_interpolator(%s)' % (self.field.name, ', '.join(str(o) for o in self.offsets))
def __repr__(self):
return self.__str__()
def _latex(self, printer, *_):
n = self.field.latex_name if self.field.latex_name else self.field.name
foo = ", ".join(str(printer.doprint(o)) for o in self.offsets)
return f'{n}_{{interpolator}}\\left({foo}\\right)'
@property
def ndim(self):
return len(self.offsets)
@property
def is_texture(self):
return isinstance(self.interpolator, TextureCachedField)
def atoms(self, *types):
if self.offsets:
offsets = set(o for o in self.offsets if isinstance(o, types))
......@@ -177,6 +216,11 @@ class InterpolatorAccess(TypedSymbol):
else:
return set()
def neighbor(self, coord_id, offset):
offset_list = list(self.offsets)
offset_list[coord_id] += offset
return self.interpolator.at(tuple(offset_list))
@property
def free_symbols(self):
symbols = set()
......@@ -189,6 +233,13 @@ class InterpolatorAccess(TypedSymbol):
return symbols
@property
def required_global_declarations(self):
required_global_declarations = self.symbol.interpolator.required_global_declarations
if required_global_declarations:
required_global_declarations[0]._symbols_defined.add(self)
return required_global_declarations
@property
def args(self):
return [self.symbol, *self.offsets]
......@@ -201,14 +252,27 @@ class InterpolatorAccess(TypedSymbol):
def interpolation_mode(self):
return self.interpolator.interpolation_mode
@property
def _diff_interpolation_vec(self):
return sp.Matrix([DiffInterpolatorAccess(self.symbol, i, *self.offsets)
for i in range(len(self.offsets))])
def diff(self, *symbols, **kwargs):
if symbols == (self,):
return 1
rtn = self._diff_interpolation_vec.T * sp.Matrix(self.offsets).diff(*symbols, **kwargs)
if rtn.shape == (1, 1):
rtn = rtn[0, 0]
return rtn
def implementation_with_stencils(self):
field = self.field
default_int_type = create_type('int64')
use_textures = isinstance(self, TextureAccess)
use_textures = isinstance(self.interpolator, TextureCachedField)
if use_textures:
def absolute_access(x, _):
return self.texture.at((o for o in x))
return self.symbol.interpolator.at((o for o in x))
else:
absolute_access = field.absolute_access
......@@ -255,7 +319,7 @@ class InterpolatorAccess(TypedSymbol):
for (dim, i) in enumerate(index)]
index = [cast_func(sp.Piecewise((i, i > 0),
(sp.Abs(cast_func(field.shape[dim] - 1 + i, default_int_type)),
True)), default_int_type)
True)), default_int_type)
for (dim, i) in enumerate(index)]
sum[channel_idx] += weight * \
absolute_access(index, channel_idx if field.index_dimensions else ())
......@@ -287,12 +351,61 @@ class InterpolatorAccess(TypedSymbol):
# noinspection SpellCheckingInspection
__xnew_cached_ = staticmethod(cacheit(__new_stage2__))
def __getnewargs__(self):
return tuple(self.symbol, *self.offsets)
class DiffInterpolatorAccess(InterpolatorAccess):
def __new__(cls, symbol, diff_coordinate_idx, *offsets, **kwargs):
if symbol.interpolator.interpolation_mode == InterpolationMode.LINEAR:
from pystencils.fd import Diff, Discretization2ndOrder
return Discretization2ndOrder(1)(Diff(symbol.interpolator.at(offsets), diff_coordinate_idx))
obj = DiffInterpolatorAccess.__xnew_cached_(cls, symbol, diff_coordinate_idx, *offsets, **kwargs)
return obj
def __new_stage2__(self, symbol: sp.Symbol, diff_coordinate_idx, *offsets):
assert offsets is not None
obj = super().__xnew__(self, symbol, *offsets)
obj.diff_coordinate_idx = diff_coordinate_idx
return obj
def __hash__(self):
return hash((self.symbol, self.field, self.diff_coordinate_idx, tuple(self.offsets), self.interpolator))
def __str__(self):
return '%s_diff%i_interpolator(%s)' % (self.field.name, self.diff_coordinate_idx,
', '.join(str(o) for o in self.offsets))
def __repr__(self):
return str(self)
@property
def args(self):
return [self.symbol, self.diff_coordinate_idx, *self.offsets]
@property
def symbols_defined(self) -> Set[sp.Symbol]:
return {self}
@property
def interpolation_mode(self):
return self.interpolator.interpolation_mode
# noinspection SpellCheckingInspection
__xnew__ = staticmethod(__new_stage2__)
# noinspection SpellCheckingInspection
__xnew_cached_ = staticmethod(cacheit(__new_stage2__))
def __getnewargs__(self):
return tuple(self.symbol, self.diff_coordinate_idx, *self.offsets)
##########################################################################################
# GPU-specific fast specializations (for precision GPUs can also use above nodes/symbols #
##########################################################################################
class TextureCachedField:
class TextureCachedField(Interpolator):
def __init__(self, parent_field,
address_mode=None,
......@@ -301,28 +414,19 @@ class TextureCachedField:
use_normalized_coordinates=False,
read_as_integer=False
):
if isinstance(address_mode, str):
address_mode = getattr(pycuda.driver.address_mode, address_mode.upper())
super().__init__(parent_field, interpolation_mode, address_mode, use_normalized_coordinates)
if address_mode is None:
address_mode = pycuda.driver.address_mode.BORDER
address_mode = 'border'
if filter_mode is None:
filter_mode = pycuda.driver.filter_mode.LINEAR
# self, field_name, field_type, dtype, layout, shape, strides
self.field = parent_field
self.address_mode = address_mode
self.filter_mode = filter_mode
self.read_as_integer = read_as_integer
self.use_normalized_coordinates = use_normalized_coordinates
self.interpolation_mode = interpolation_mode
self.symbol = TypedSymbol(str(self), self.field.dtype.numpy_dtype)
self.symbol.interpolator = self
self.symbol.field = self.field
self.required_global_declarations = [TextureDeclaration(self)]
# assert str(self.field.dtype) != 'double', "CUDA does not support double textures!"
# assert dtype_supports_textures(self.field.dtype), "CUDA only supports texture types with 32 bits or less"
@property
def ndim(self):
return self.field.ndim
@classmethod
def from_interpolator(cls, interpolator: LinearInterpolator):
......@@ -332,59 +436,17 @@ class TextureCachedField:
obj = cls(interpolator.field, interpolator.address_mode, interpolation_mode=interpolator.interpolation_mode)
return obj
def at(self, offset):
return TextureAccess(self.symbol, *offset)
def __getitem__(self, offset):
return TextureAccess(self.symbol, *offset)
def __str__(self):
return '%s_texture_%s' % (self.field.name, self.reproducible_hash)
def __repr__(self):
return self.__str__()
@property
def _hashable_contents(self):
return (type(self),
self.address_mode,
self.filter_mode,
self.read_as_integer,
self.interpolation_mode,
self.use_normalized_coordinates)
def __hash__(self):
return hash(self._hashable_contents)
@property
def reproducible_hash(self):
return _hash(str(self._hashable_contents).encode()).hexdigest()
class TextureAccess(InterpolatorAccess):
def __new__(cls, texture_symbol, offsets, *args, **kwargs):
obj = TextureAccess.__xnew_cached_(cls, texture_symbol, offsets, *args, **kwargs)
return obj
def __new_stage2__(self, symbol, *offsets):
obj = super().__xnew__(self, symbol, *offsets)
obj.required_global_declarations = symbol.interpolator.required_global_declarations
obj.required_global_declarations[0]._symbols_defined.add(obj)
return obj
def __str__(self):
return '%s_texture(%s)' % (self.interpolator.field.name, ','.join(str(o) for o in self.offsets))
@property
def texture(self):
return self.interpolator
# noinspection SpellCheckingInspection
__xnew__ = staticmethod(__new_stage2__)
# noinspection SpellCheckingInspection
__xnew_cached_ = staticmethod(cacheit(__new_stage2__))
class TextureDeclaration(Node):
"""
A global declaration of a texture. Visible both for device and host code.
......@@ -421,12 +483,18 @@ class TextureDeclaration(Node):
@property
def headers(self):
return ['"pycuda-helpers.hpp"']
headers = ['"pycuda-helpers.hpp"']
if self.texture.interpolation_mode == InterpolationMode.CUBIC_SPLINE:
headers.append('"cubicTex%iD.cu"' % self.texture.ndim)
return headers
def __str__(self):
from pystencils.backends.cuda_backend import CudaBackend
return CudaBackend()(self)
def __repr__(self):
return str(self)
class TextureObject(TextureDeclaration):
"""
......
......@@ -44,3 +44,13 @@ def optimize_assignments(assignments, optimizations):
a.optimize(optimizations)
return assignments
def optimize_ast(ast, optimizations):
if HAS_REWRITING:
assignments_nodes = ast.atoms(SympyAssignment)
for a in assignments_nodes:
a.optimize(optimizations)
return ast
......@@ -156,6 +156,9 @@ class AssignmentCollection:
"""See :func:`count_operations` """
return count_operations(self.all_assignments, only_type=None)
def atoms(self, *args):
return set().union(*[a.atoms(*args) for a in self.all_assignments])
def dependent_symbols(self, symbols: Iterable[sp.Symbol]) -> Set[sp.Symbol]:
"""Returns all symbols that depend on one of the passed symbols.
......
......@@ -14,8 +14,8 @@ import pystencils.astnodes as ast
import pystencils.integer_functions
from pystencils.assignment import Assignment
from pystencils.data_types import (
PointerType, StructType, TypedImaginaryUnit, TypedSymbol, cast_func, collate_types, create_type, get_base_type,
get_type_of_expression, pointer_arithmetic_func, reinterpret_cast_func)
PointerType, StructType, TypedImaginaryUnit, TypedSymbol, cast_func, collate_types, create_type,
get_base_type, get_type_of_expression, pointer_arithmetic_func, reinterpret_cast_func)
from pystencils.field import AbstractField, Field, FieldType
from pystencils.kernelparameters import FieldPointerSymbol
from pystencils.simp.assignment_collection import AssignmentCollection
......@@ -1314,7 +1314,7 @@ def implement_interpolations(ast_node: ast.Node,
implement_by_texture_accesses: bool = False,
vectorize: bool = False,
use_hardware_interpolation_for_f32=True):
from pystencils.interpolation_astnodes import InterpolatorAccess, TextureAccess, TextureCachedField
from pystencils.interpolation_astnodes import (InterpolatorAccess, TextureCachedField)
# TODO: perform this function on assignments, when unify_shape_symbols allows differently sized fields
assert not(implement_by_texture_accesses and vectorize), \
......@@ -1322,37 +1322,42 @@ def implement_interpolations(ast_node: ast.Node,
FLOAT32_T = create_type('float32')