Commit 1c0665c4 authored by Stephan Seitz's avatar Stephan Seitz
Browse files

Implement interpolation (without CubicInterpolationCUDA)

parent e871e864
......@@ -103,6 +103,10 @@ def get_headers(ast_node: Node) -> Set[str]:
if isinstance(a, Node):
headers.update(get_headers(a))
for g in get_global_declarations(ast_node):
if isinstance(g, Node):
headers.update(get_headers(g))
return sorted(headers)
......
......@@ -3,6 +3,7 @@ from os.path import dirname, join
from pystencils.astnodes import Node
from pystencils.backends.cbackend import CBackend, CustomSympyPrinter, generate_c
from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
from pystencils.interpolation_astnodes import InterpolationMode
with open(join(dirname(__file__), 'cuda_known_functions.txt')) as f:
lines = f.readlines()
......@@ -43,6 +44,14 @@ class CudaBackend(CBackend):
return code
def _print_TextureDeclaration(self, node):
if node.texture.field.dtype.numpy_dtype.itemsize > 4:
code = "texture<fp_tex_%s, cudaTextureType%iD, cudaReadModeElementType> %s;" % (
str(node.texture.field.dtype),
node.texture.field.spatial_dimensions,
node.texture
)
else:
code = "texture<%s, cudaTextureType%iD, cudaReadModeElementType> %s;" % (
str(node.texture.field.dtype),
node.texture.field.spatial_dimensions,
......@@ -62,17 +71,23 @@ class CudaSympyPrinter(CustomSympyPrinter):
self.known_functions.update(CUDA_KNOWN_FUNCTIONS)
def _print_TextureAccess(self, node):
dtype = node.texture.field.dtype.numpy_dtype
if node.texture.cubic_bspline_interpolation:
template = "cubicTex%iDSimple<%s>(%s, %s)"
if node.texture.interpolation_mode == InterpolationMode.CUBIC_SPLINE:
template = "cubicTex%iDSimple(%s, %s)"
else:
template = "tex%iD<%s>(%s, %s)"
if dtype.itemsize > 4:
# Use PyCuda hack!
# https://github.com/inducer/pycuda/blob/master/pycuda/cuda/pycuda-helpers.hpp
template = "fp_tex%iD(%s, %s)"
else:
template = "tex%iD(%s, %s)"
code = template % (
node.texture.field.spatial_dimensions,
str(node.texture.field.dtype),
str(node.texture),
', '.join(self._print(o) for o in node.offsets)
# + 0.5 comes from Nvidia's staggered indexing
', '.join(self._print(o + 0.5) for o in reversed(node.offsets))
)
return code
......
......@@ -45,6 +45,7 @@ tex1D
tex2D
tex3D
sqrtf
rsqrtf
cbrtf
rcbrtf
......
......@@ -10,8 +10,8 @@ from pystencils.data_types import BasicType, StructType, TypedSymbol, create_typ
from pystencils.field import Field, FieldType
from pystencils.transformations import (
add_types, filtered_tree_iteration, get_base_buffer_index, get_optimal_loop_ordering,
make_loop_over_domain, move_constants_before_loop, parse_base_pointer_info,
resolve_buffer_accesses, resolve_field_accesses, split_inner_loop)
implement_interpolations, make_loop_over_domain, move_constants_before_loop,
parse_base_pointer_info, resolve_buffer_accesses, resolve_field_accesses, split_inner_loop)
AssignmentOrAstNodeList = List[Union[Assignment, ast.Node]]
......@@ -67,6 +67,7 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke
ghost_layers=ghost_layers, loop_order=loop_order)
ast_node = KernelFunction(loop_node, 'cpu', 'c', compile_function=make_python_function,
ghost_layers=ghost_layer_info, function_name=function_name)
implement_interpolations(body)
if split_groups:
typed_split_groups = [[type_symbol(s) for s in split_group] for split_group in split_groups]
......@@ -139,6 +140,8 @@ def create_indexed_kernel(assignments: AssignmentOrAstNodeList, index_fields, fu
loop_body = Block([])
loop_node = LoopOverCoordinate(loop_body, coordinate_to_loop_over=0, start=0, stop=index_fields[0].shape[0])
implement_interpolations(loop_node)
for assignment in assignments:
loop_body.append(assignment)
......
import ctypes
from collections import defaultdict
from functools import partial
from typing import Tuple
import numpy as np
import sympy as sp
import sympy.codegen.ast
from sympy.core.cache import cacheit
from sympy.logic.boolalg import Boolean
import pystencils
from pystencils.cache import memorycache, memorycache_if_hashable
from pystencils.utils import all_equal
......@@ -17,6 +20,26 @@ except ImportError as e:
_ir_importerror = e
def typed_symbols(names, dtype, *args):
symbols = sp.symbols(names, *args)
if isinstance(symbols, Tuple):
return tuple(TypedSymbol(str(s), dtype) for s in symbols)
else:
return TypedSymbol(str(symbols), dtype)
def matrix_symbols(names, dtype, rows, cols):
if isinstance(names, str):
names = names.replace(' ', '').split(',')
matrices = []
for n in names:
symbols = typed_symbols("%s:%i" % (n, rows * cols), dtype)
matrices.append(sp.Matrix(rows, cols, lambda i, j: symbols[i * cols + j]))
return tuple(matrices)
# noinspection PyPep8Naming
class address_of(sp.Function):
is_Atom = True
......@@ -86,6 +109,11 @@ class cast_func(sp.Function):
@property
def is_integer(self):
"""
Uses Numpy type hierarchy to determine :func:`sympy.Expr.is_integer` predicate
For reference: Numpy type hierarchy https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.scalars.html
"""
if hasattr(self.dtype, 'numpy_dtype'):
return np.issubdtype(self.dtype.numpy_dtype, np.integer) or super().is_integer
else:
......@@ -93,6 +121,9 @@ class cast_func(sp.Function):
@property
def is_negative(self):
"""
See :func:`.TypedSymbol.is_integer`
"""
if hasattr(self.dtype, 'numpy_dtype'):
if np.issubdtype(self.dtype.numpy_dtype, np.unsignedinteger):
return False
......@@ -101,6 +132,9 @@ class cast_func(sp.Function):
@property
def is_nonnegative(self):
"""
See :func:`.TypedSymbol.is_integer`
"""
if self.is_negative is False:
return True
else:
......@@ -108,6 +142,9 @@ class cast_func(sp.Function):
@property
def is_real(self):
"""
See :func:`.TypedSymbol.is_integer`
"""
if hasattr(self.dtype, 'numpy_dtype'):
return np.issubdtype(self.dtype.numpy_dtype, np.integer) or \
np.issubdtype(self.dtype.numpy_dtype, np.floating) or \
......@@ -171,6 +208,11 @@ class TypedSymbol(sp.Symbol):
# For reference: Numpy type hierarchy https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.scalars.html
@property
def is_integer(self):
"""
Uses Numpy type hierarchy to determine :func:`sympy.Expr.is_integer` predicate
For reference: Numpy type hierarchy https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.scalars.html
"""
if hasattr(self.dtype, 'numpy_dtype'):
return np.issubdtype(self.dtype.numpy_dtype, np.integer) or super().is_integer
else:
......@@ -178,6 +220,9 @@ class TypedSymbol(sp.Symbol):
@property
def is_negative(self):
"""
See :func:`.TypedSymbol.is_integer`
"""
if hasattr(self.dtype, 'numpy_dtype'):
if np.issubdtype(self.dtype.numpy_dtype, np.unsignedinteger):
return False
......@@ -186,6 +231,9 @@ class TypedSymbol(sp.Symbol):
@property
def is_nonnegative(self):
"""
See :func:`.TypedSymbol.is_integer`
"""
if self.is_negative is False:
return True
else:
......@@ -193,6 +241,9 @@ class TypedSymbol(sp.Symbol):
@property
def is_real(self):
"""
See :func:`.TypedSymbol.is_integer`
"""
if hasattr(self.dtype, 'numpy_dtype'):
return np.issubdtype(self.dtype.numpy_dtype, np.integer) or \
np.issubdtype(self.dtype.numpy_dtype, np.floating) or \
......@@ -370,12 +421,17 @@ def peel_off_type(dtype, type_to_peel_off):
return dtype
def collate_types(types):
def collate_types(types, forbid_collation_to_float=False):
"""
Takes a sequence of types and returns their "common type" e.g. (float, double, float) -> double
Uses the collation rules from numpy.
"""
if forbid_collation_to_float:
types = [t for t in types if not (hasattr(t, 'is_float') and t.is_float())]
if not types:
return [create_type('int32')]
# Pointer arithmetic case i.e. pointer + integer is allowed
if any(type(t) is PointerType for t in types):
pointer_type = None
......@@ -433,6 +489,8 @@ def get_type_of_expression(expr,
return create_type(default_float_type)
elif isinstance(expr, ResolvedFieldAccess):
return expr.field.dtype
elif isinstance(expr, pystencils.field.Field.AbstractAccess):
return expr.field.dtype
elif isinstance(expr, TypedSymbol):
return expr.dtype
elif isinstance(expr, sp.Symbol):
......@@ -525,6 +583,10 @@ class BasicType(Type):
def numpy_dtype(self):
return self._dtype
@property
def sympy_dtype(self):
return getattr(sympy.codegen.ast, str(self.numpy_dtype))
@property
def item_size(self):
return 1
......
import functools
import hashlib
import operator
import pickle
import re
from enum import Enum
......@@ -9,6 +11,7 @@ import numpy as np
import sympy as sp
from sympy.core.cache import cacheit
import pystencils
from pystencils.alignedarray import aligned_empty
from pystencils.data_types import StructType, TypedSymbol, create_type
from pystencils.kernelparameters import FieldShapeSymbol, FieldStrideSymbol
......@@ -38,7 +41,6 @@ def fields(description=None, index_dimensions=0, layout=None, **kwargs):
>>> assert s.index_dimensions == 0 and s.dtype.numpy_dtype == arr_s.dtype
>>> assert v.index_shape == (2,)
Format string can be left out, field names are taken from keyword arguments.
>>> fields(f1=arr_s, f2=arr_s)
[f1, f2]
......@@ -292,6 +294,10 @@ class Field(AbstractField):
self.shape = shape
self.strides = strides
self.latex_name = None # type: Optional[str]
self.coordinate_origin = sp.Matrix(tuple(
0 for _ in range(self.spatial_dimensions)
)) # type: tuple[float,sp.Symbol]
self.coordinate_transform = sp.eye(self.spatial_dimensions)
def new_field_with_different_name(self, new_name):
if self.has_fixed_shape:
......@@ -312,6 +318,9 @@ class Field(AbstractField):
def ndim(self) -> int:
return len(self.shape)
def values_per_cell(self) -> int:
return functools.reduce(operator.mul, self.index_shape, 1)
@property
def layout(self):
return self._layout
......@@ -393,6 +402,27 @@ class Field(AbstractField):
assert FieldType.is_custom(self)
return Field.Access(self, offset, index, is_absolute_access=True)
def interpolated_access(self,
offset: Tuple,
interpolation_mode='linear',
address_mode='BORDER',
allow_textures=True):
"""Provides access to field values at non-integer positions
``interpolated_access`` is similar to :func:`Field.absolute_access` except that
it allows non-integer offsets and automatic handling of out-of-bound accesses.
:param offset: Tuple of spatial coordinates (can be floats)
:param interpolation_mode: One of :class:`pystencils.interpolation_astnodes.InterpolationMode`
:param address_mode: How boundaries are handled can be 'border', 'wrap', 'mirror', 'clamp'
:param allow_textures: Allow implementation by texture accesses on GPUs
"""
from pystencils.interpolation_astnodes import Interpolator
return Interpolator(self,
interpolation_mode,
address_mode,
allow_textures=allow_textures).at(offset)
def __call__(self, *args, **kwargs):
center = tuple([0] * self.spatial_dimensions)
return Field.Access(self, center)(*args, **kwargs)
......@@ -409,6 +439,34 @@ class Field(AbstractField):
return False
return self.hashable_contents() == other.hashable_contents()
@property
def physical_coordinates(self):
return self.coordinate_transform @ (self.coordinate_origin + pystencils.x_vector(self.spatial_dimensions))
@property
def physical_coordinates_staggered(self):
return self.coordinate_transform @ \
(self.coordinate_origin + pystencils.x_staggered_vector(self.spatial_dimensions))
def index_to_physical(self, index_coordinates, staggered=False):
if staggered:
index_coordinates = sp.Matrix([i + 0.5 for i in index_coordinates])
return self.coordinate_transform @ (self.coordinate_origin + index_coordinates)
def physical_to_index(self, physical_coordinates, staggered=False):
rtn = self.coordinate_transform.inv() @ physical_coordinates - self.coordinate_origin
if staggered:
rtn = sp.Matrix([i - 0.5 for i in rtn])
return rtn
def index_to_staggered_physical_coordinates(self, symbol_vector):
symbol_vector += sp.Matrix([0.5] * self.spatial_dimensions)
return self.create_physical_coordinates(symbol_vector)
def set_coordinate_origin_to_field_center(self):
self.coordinate_origin = -sp.Matrix([i / 2 for i in self.spatial_shape])
# noinspection PyAttributeOutsideInit,PyUnresolvedReferences
class Access(TypedSymbol, AbstractField.AbstractAccess):
"""Class representing a relative access into a `Field`.
......@@ -429,11 +487,12 @@ class Field(AbstractField):
>>> central_y_component.at_index(0) # change component
v_C^0
"""
def __new__(cls, name, *args, **kwargs):
obj = Field.Access.__xnew_cached_(cls, name, *args, **kwargs)
return obj
def __new_stage2__(self, field, offsets=(0, 0, 0), idx=None, is_absolute_access=False):
def __new_stage2__(self, field, offsets=(0, 0, 0), idx=None, is_absolute_access=False, dtype=None):
field_name = field.name
offsets_and_index = (*offsets, *idx) if idx is not None else offsets
constant_offsets = not any([isinstance(o, sp.Basic) and not o.is_Integer for o in offsets_and_index])
......@@ -484,7 +543,7 @@ class Field(AbstractField):
return obj
def __getnewargs__(self):
return self.field, self.offsets, self.index, self.is_absolute_access
return self.field, self.offsets, self.index, self.is_absolute_access, self.dtype
# noinspection SpellCheckingInspection
__xnew__ = staticmethod(__new_stage2__)
......@@ -503,7 +562,7 @@ class Field(AbstractField):
if len(idx) != self.field.index_dimensions:
raise ValueError("Wrong number of indices: "
"Got %d, expected %d" % (len(idx), self.field.index_dimensions))
return Field.Access(self.field, self._offsets, idx)
return Field.Access(self.field, self._offsets, idx, dtype=self.dtype)
def __getitem__(self, *idx):
return self.__call__(*idx)
......@@ -562,7 +621,7 @@ class Field(AbstractField):
"""
offset_list = list(self.offsets)
offset_list[coord_id] += offset
return Field.Access(self.field, tuple(offset_list), self.index)
return Field.Access(self.field, tuple(offset_list), self.index, dtype=self.dtype)
def get_shifted(self, *shift) -> 'Field.Access':
"""Returns a new Access with changed spatial coordinates
......@@ -572,7 +631,10 @@ class Field(AbstractField):
>>> f[0,0].get_shifted(1, 1)
f_NE
"""
return Field.Access(self.field, tuple(a + b for a, b in zip(shift, self.offsets)), self.index)
return Field.Access(self.field,
tuple(a + b for a, b in zip(shift, self.offsets)),
self.index,
dtype=self.dtype)
def at_index(self, *idx_tuple) -> 'Field.Access':
"""Returns new Access with changed index.
......@@ -582,7 +644,7 @@ class Field(AbstractField):
>>> f(0).at_index(8)
f_C^8
"""
return Field.Access(self.field, self.offsets, idx_tuple)
return Field.Access(self.field, self.offsets, idx_tuple, dtype=self.dtype)
@property
def is_absolute_access(self) -> bool:
......
......@@ -3,7 +3,9 @@ import numpy as np
from pystencils.backends.cbackend import generate_c, get_headers
from pystencils.data_types import StructType
from pystencils.field import FieldType
from pystencils.include import get_pystencils_include_path
from pystencils.gpucuda.texture_utils import ndarray_to_tex
from pystencils.include import get_pycuda_include_path, get_pystencils_include_path
from pystencils.interpolation_astnodes import TextureAccess
from pystencils.kernelparameters import FieldPointerSymbol
USE_FAST_MATH = True
......@@ -29,17 +31,33 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
if argument_dict is None:
argument_dict = {}
header_list = ['<stdint.h>'] + list(get_headers(kernel_function_node))
header_list = ['<cstdint>'] + list(get_headers(kernel_function_node))
includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])
code = includes + "\n"
code += "#define FUNC_PREFIX __global__\n"
code += "#define RESTRICT __restrict__\n\n"
code += str(generate_c(kernel_function_node, dialect='cuda', custom_backend=custom_backend))
options = ["-w", "-std=c++11", "-Wno-deprecated-gpu-targets"]
textures = set(d.texture for d in kernel_function_node.atoms(TextureAccess))
nvcc_options = ["-w", "-std=c++11", "-Wno-deprecated-gpu-targets"]
if USE_FAST_MATH:
options.append("-use_fast_math")
mod = SourceModule(code, options=options, include_dirs=[get_pystencils_include_path()])
nvcc_options.append("-use_fast_math")
# Code for
# if any(t.interpolation_mode == InterpolationMode.CUBIC_SPLINE for t in textures):
# assert isdir(join(dirname(__file__), "CubicInterpolationCUDA", "code")), \
# "Submodule CubicInterpolationCUDA does not exist"
# nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code")]
# nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")]
# needed_dims = set(t.field.spatial_dimensions for t in textures
# if t.interpolation_mode == InterpolationMode.CUBIC_SPLINE)
# for i in needed_dims:
# code = 'extern "C++" {\n#include "cubicTex%iD.cu"\n}\n' % i + code
mod = SourceModule(code, options=nvcc_options, include_dirs=[
get_pystencils_include_path(), get_pycuda_include_path()])
func = mod.get_function(kernel_function_node.function_name)
parameters = kernel_function_node.get_parameters()
......@@ -63,6 +81,12 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
block_and_thread_numbers['block'] = tuple(int(i) for i in block_and_thread_numbers['block'])
block_and_thread_numbers['grid'] = tuple(int(i) for i in block_and_thread_numbers['grid'])
# TODO: use texture objects:
# https://devblogs.nvidia.com/cuda-pro-tip-kepler-texture-objects-improve-performance-and-flexibility/
for tex in textures:
tex_ref = mod.get_texref(str(tex))
ndarray_to_tex(tex_ref, full_arguments[tex.field.name], tex.address_mode,
tex.filter_mode, tex.use_normalized_coordinates, tex.read_as_integer)
args = _build_numpy_argument_list(parameters, full_arguments)
cache[key] = (args, block_and_thread_numbers)
cache_values.append(kwargs) # keep objects alive such that ids remain unique
......
......@@ -4,12 +4,18 @@ from pystencils.field import Field, FieldType
from pystencils.gpucuda.cudajit import make_python_function
from pystencils.gpucuda.indexing import BlockIndexing
from pystencils.transformations import (
add_types, get_base_buffer_index, get_common_shape, parse_base_pointer_info,
resolve_buffer_accesses, resolve_field_accesses, unify_shape_symbols)
def create_cuda_kernel(assignments, function_name="kernel", type_info=None, indexing_creator=BlockIndexing,
iteration_slice=None, ghost_layers=None, skip_independence_check=False):
add_types, get_base_buffer_index, get_common_shape, implement_interpolations,
parse_base_pointer_info, resolve_buffer_accesses, resolve_field_accesses, unify_shape_symbols)
def create_cuda_kernel(assignments,
function_name="kernel",
type_info=None,
indexing_creator=BlockIndexing,
iteration_slice=None,
ghost_layers=None,
skip_independence_check=False,
use_textures_for_interpolation=True):
fields_read, fields_written, assignments = add_types(assignments, type_info, not skip_independence_check)
all_fields = fields_read.union(fields_written)
read_only_fields = set([f.name for f in fields_read - fields_written])
......@@ -57,6 +63,8 @@ def create_cuda_kernel(assignments, function_name="kernel", type_info=None, inde
ast = KernelFunction(block, 'gpu', 'gpucuda', make_python_function, ghost_layers, function_name)
ast.global_variables.update(indexing.index_variables)
implement_interpolations(ast, implement_by_texture_accesses=use_textures_for_interpolation)
base_pointer_spec = [['spatialInner0']]
base_pointer_info = {f.name: parse_base_pointer_info(base_pointer_spec, [2, 1, 0],
f.spatial_dimensions, f.index_dimensions)
......@@ -86,8 +94,13 @@ def create_cuda_kernel(assignments, function_name="kernel", type_info=None, inde
return ast
def created_indexed_cuda_kernel(assignments, index_fields, function_name="kernel", type_info=None,
coordinate_names=('x', 'y', 'z'), indexing_creator=BlockIndexing):
def created_indexed_cuda_kernel(assignments,
index_fields,
function_name="kernel",
type_info=None,
coordinate_names=('x', 'y', 'z'),
indexing_creator=BlockIndexing,
use_textures_for_interpolation=True):
fields_read, fields_written, assignments = add_types(assignments, type_info, check_independence_condition=False)
all_fields = fields_read.union(fields_written)
read_only_fields = set([f.name for f in fields_read - fields_written])
......@@ -125,6 +138,8 @@ def created_indexed_cuda_kernel(assignments, index_fields, function_name="kernel
ast = KernelFunction(function_body, 'gpu', 'gpucuda', make_python_function, None, function_name)
ast.global_variables.update(indexing.index_variables)
implement_interpolations(ast, implement_by_texture_accesses=use_textures_for_interpolation)
coord_mapping = indexing.coordinates
base_pointer_spec = [['spatialInner0']]
base_pointer_info = {f.name: parse_base_pointer_info(base_pointer_spec, [2, 1, 0],
......
# -*- coding: utf-8 -*-
#
# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
#
# Distributed under terms of the GPLv3 license.
"""
"""
from os.path import dirname, isdir, join
import numpy as np
try:
import pycuda.driver as cuda
from pycuda import gpuarray
except Exception:
pass
def pow_two_divider(n):
if n == 0:
return 0
divider = 1
while (n & divider) == 0:
divider <<= 1
return divider
def ndarray_to_tex(tex_ref,
ndarray,
address_mode=None,
filter_mode=None,
use_normalized_coordinates=False,
read_as_integer=False):
if address_mode is None:
address_mode = cuda.address_mode.BORDER
if filter_mode is None:
filter_mode = cuda.filter_mode.LINEAR
if isinstance(ndarray, np.ndarray):
cu_array = cuda.np_to_array(ndarray, 'C')
elif isinstance(ndarray, gpuarray.GPUArray):
cu_array = cuda.gpuarray_to_array(ndarray, 'C')
else:
raise TypeError(
'ndarray must be numpy.ndarray or pycuda.gpuarray.GPUArray')
cuda.TextureReference.set_array(tex_ref, cu_array)
tex_ref.set_address_mode(0, address_mode)
if ndarray.ndim >= 2:
tex_ref.set_address_mode(1, address_mode)
if ndarray.ndim >= 3:
tex_ref.set_address_mode(2, address_mode)
tex_ref.set_filter_mode(filter_mode)
if not use_normalized_coordinates:
tex_ref.set_flags(tex_ref.get_flags() & ~cuda.TRSF_NORMALIZED_COORDINATES)
if not read_as_integer:
tex_ref.set_flags(tex_ref.get_flags() & ~cuda.TRSF_READ_AS_INTEGER)