Commit d1f332bd authored by Stephan Seitz's avatar Stephan Seitz
Browse files

Merge branch 'Extend_testsuit' into 'master'

Extend testsuit

See merge request !174
parents 9057081d 6654fa17
Pipeline #27274 passed with stages
in 28 minutes and 19 seconds
......@@ -93,6 +93,7 @@ minimal-conda:
- python setup.py quicktest
tags:
- docker
- cuda
minimal-sympy-master:
......@@ -107,6 +108,7 @@ minimal-sympy-master:
allow_failure: true
tags:
- docker
- cuda
pycodegen-integration:
......
......@@ -45,9 +45,11 @@ def generate_c(ast_node: Node,
create_kernel functions.
Args:
ast_node:
signature_only:
dialect: 'c' or 'cuda'
ast_node: ast representation of kernel
signature_only: generate signature without function body
dialect: 'c', 'cuda' or opencl
custom_backend: use own custom printer for code generation
with_globals: enable usage of global variables
Returns:
C-like code for the ast node and its descendants
"""
......
......@@ -10,17 +10,20 @@ with open(join(dirname(__file__), 'cuda_known_functions.txt')) as f:
CUDA_KNOWN_FUNCTIONS = {l.strip(): l.strip() for l in lines if l}
def generate_cuda(astnode: Node, signature_only: bool = False) -> str:
def generate_cuda(ast_node: Node, signature_only: bool = False, custom_backend=None, with_globals=True) -> str:
"""Prints an abstract syntax tree node as CUDA code.
Args:
astnode: KernelFunction node to generate code for
signature_only: if True only the signature is printed
ast_node: ast representation of kernel
signature_only: generate signature without function body
custom_backend: use own custom printer for code generation
with_globals: enable usage of global variables
Returns:
C-like code for the ast node and its descendants
CUDA code for the ast node and its descendants
"""
return generate_c(astnode, signature_only, dialect='cuda')
return generate_c(ast_node, signature_only, dialect='cuda',
custom_backend=custom_backend, with_globals=with_globals)
class CudaBackend(CBackend):
......
......@@ -12,22 +12,10 @@ import json
from pystencils.astnodes import NodeOrExpr
from pystencils.backends.cbackend import CustomSympyPrinter, generate_c
try:
import toml
except Exception:
class toml:
def dumps(self, *args):
raise ImportError('toml not installed')
def dump(self, *args):
raise ImportError('toml not installed')
try:
import yaml
except Exception:
class yaml:
def dumps(self, *args):
raise ImportError('pyyaml not installed')
except ImportError:
raise ImportError('yaml not installed')
def expr_to_dict(expr_or_node: NodeOrExpr, with_c_code=True, full_class_names=False):
......@@ -67,8 +55,8 @@ def print_json(expr_or_node: NodeOrExpr):
Returns:
str: JSON representation of AST
"""
dict = expr_to_dict(expr_or_node)
return json.dumps(dict, indent=4)
expr_or_node_dict = expr_to_dict(expr_or_node)
return json.dumps(expr_or_node_dict, indent=4)
def write_json(filename: str, expr_or_node: NodeOrExpr):
......@@ -78,28 +66,17 @@ def write_json(filename: str, expr_or_node: NodeOrExpr):
filename (str): Path for the file to write
expr_or_node (NodeOrExpr): a SymPy expression or a :class:`pystencils.astnodes.Node`
"""
dict = expr_to_dict(expr_or_node)
with open(filename, 'w') as f:
json.dump(dict, f, indent=4)
def print_toml(expr_or_node):
dict = expr_to_dict(expr_or_node, full_class_names=False)
return toml.dumps(dict)
def write_toml(filename, expr_or_node):
dict = expr_to_dict(expr_or_node)
expr_or_node_dict = expr_to_dict(expr_or_node)
with open(filename, 'w') as f:
toml.dump(dict, f)
json.dump(expr_or_node_dict, f, indent=4)
def print_yaml(expr_or_node):
dict = expr_to_dict(expr_or_node, full_class_names=False)
return yaml.dump(dict)
expr_or_node_dict = expr_to_dict(expr_or_node, full_class_names=False)
return yaml.dump(expr_or_node_dict)
def write_yaml(filename, expr_or_node):
dict = expr_to_dict(expr_or_node)
expr_or_node_dict = expr_to_dict(expr_or_node)
with open(filename, 'w') as f:
yaml.dump(dict, f)
yaml.dump(expr_or_node_dict, f)
......@@ -11,17 +11,20 @@ with open(join(dirname(__file__), 'opencl1.1_known_functions.txt')) as f:
OPENCL_KNOWN_FUNCTIONS = {l.strip(): l.strip() for l in lines if l}
def generate_opencl(astnode: Node, signature_only: bool = False) -> str:
def generate_opencl(ast_node: Node, signature_only: bool = False, custom_backend=None, with_globals=True) -> str:
"""Prints an abstract syntax tree node (made for target 'gpu') as OpenCL code.
Args:
astnode: KernelFunction node to generate code for
signature_only: if True only the signature is printed
ast_node: ast representation of kernel
signature_only: generate signature without function body
custom_backend: use own custom printer for code generation
with_globals: enable usage of global variables
Returns:
C-like code for the ast node and its descendants
OpenCL code for the ast node and its descendants
"""
return generate_c(astnode, signature_only, dialect='opencl')
return generate_c(ast_node, signature_only, dialect='opencl',
custom_backend=custom_backend, with_globals=with_globals)
class OpenClBackend(CudaBackend):
......
......@@ -13,26 +13,31 @@ class PyCudaArrayHandler:
import pycuda.autoinit # NOQA
def zeros(self, shape, dtype=np.float64, order='C'):
return gpuarray.zeros(shape, dtype, order)
cpu_array = np.zeros(shape=shape, dtype=dtype, order=order)
return self.to_gpu(cpu_array)
def ones(self, shape, dtype, order='C'):
return gpuarray.ones(shape, dtype, order)
def ones(self, shape, dtype=np.float64, order='C'):
cpu_array = np.ones(shape=shape, dtype=dtype, order=order)
return self.to_gpu(cpu_array)
def empty(self, shape, dtype=np.float64, layout=None):
if layout:
cpu_array = pystencils.field.create_numpy_array_with_layout(shape, dtype, layout)
cpu_array = pystencils.field.create_numpy_array_with_layout(shape=shape, dtype=dtype, layout=layout)
return self.to_gpu(cpu_array)
else:
return gpuarray.empty(shape, dtype)
def to_gpu(self, array):
@staticmethod
def to_gpu(array):
return gpuarray.to_gpu(array)
def upload(self, gpuarray, numpy_array):
gpuarray.set(numpy_array)
@staticmethod
def upload(array, numpy_array):
array.set(numpy_array)
def download(self, gpuarray, numpy_array):
gpuarray.get(numpy_array)
@staticmethod
def download(array, numpy_array):
array.get(numpy_array)
def randn(self, shape, dtype=np.float64):
cpu_array = np.random.randn(*shape).astype(dtype)
......
......@@ -19,15 +19,17 @@ class PyOpenClArrayHandler:
self.queue = queue
def zeros(self, shape, dtype=np.float64, order='C'):
return gpuarray.zeros(shape, dtype, order)
cpu_array = np.zeros(shape=shape, dtype=dtype, order=order)
return self.to_gpu(cpu_array)
def ones(self, shape, dtype, order='C'):
return gpuarray.ones(self.queue, shape, dtype, order)
def ones(self, shape, dtype=np.float64, order='C'):
cpu_array = np.ones(shape=shape, dtype=dtype, order=order)
return self.to_gpu(cpu_array)
def empty(self, shape, dtype=np.float64, layout=None):
if layout:
cpu_array = pystencils.field.create_numpy_array_with_layout(shape, dtype, layout)
return self.from_numpy(cpu_array)
cpu_array = pystencils.field.create_numpy_array_with_layout(shape=shape, dtype=dtype, layout=layout)
return self.to_gpu(cpu_array)
else:
return gpuarray.empty(self.queue, shape, dtype)
......
......@@ -126,7 +126,6 @@ class FiniteDifferenceStencilDerivation:
def isotropy_equations(self, order):
def cycle_int_sequence(sequence, modulus):
import numpy as np
result = []
arr = np.array(sequence, dtype=int)
while True:
......@@ -200,7 +199,7 @@ class FiniteDifferenceStencilDerivation:
via rotation and apply them to a field."""
dim = len(self.stencil[0])
assert (dim == 2 or dim == 3), "This function is only for 2D or 3D stencils available"
rotated_weights = np.rot90(np.array(self), 1, axes)
rotated_weights = np.rot90(np.array(self.__array__()), 1, axes)
result = []
max_offset = max(max(abs(e) for e in direction) for direction in self.stencil)
......
......@@ -23,9 +23,9 @@ def diffusion(scalar, diffusion_coeff, idx=None):
>>> f = Field.create_generic('f', spatial_dimensions=2)
>>> d = sp.Symbol("d")
>>> dx = sp.Symbol("dx")
>>> diffusion_term = diffusion(scalar=f, diffusion_coeff=sp.Symbol("d"))
>>> diffusion_term = diffusion(scalar=f, diffusion_coeff=d)
>>> discretization = Discretization2ndOrder()
>>> expected_output = ((f[-1, 0] + f[0, -1] - 4*f[0, 0] + f[0, 1] + f[1, 0]) * d) / dx**2
>>> expected_output = ((f[-1, 0] + f[0, -1] - 4 * f[0, 0] + f[0, 1] + f[1, 0]) * d) / dx**2
>>> sp.simplify(discretization(diffusion_term) - expected_output)
0
"""
......@@ -79,13 +79,6 @@ class Discretization2ndOrder:
self.dt = dt
self.spatial_stencil = discretization_stencil_func
@staticmethod
def _diff_order(e):
if not isinstance(e, Diff):
return 0
else:
return 1 + Discretization2ndOrder._diff_order(e.args[0])
def _discretize_diffusion(self, e):
result = 0
for c in range(e.dim):
......@@ -121,29 +114,6 @@ class Discretization2ndOrder:
new_args = [self._discretize_spatial(a) for a in e.args]
return e.func(*new_args) if new_args else e
def _discretize_diff(self, e):
order = self._diff_order(e)
if order == 1:
fa = e.args[0]
index = e.target
return (fa.neighbor(index, 1) - fa.neighbor(index, -1)) / (2 * self.dx)
elif order == 2:
indices = sorted([e.target, e.args[0].target])
fa = e.args[0].args[0]
if indices[0] == indices[1] and all(i >= 0 for i in indices):
result = (-2 * fa + fa.neighbor(indices[0], -1) + fa.neighbor(indices[0], +1))
elif indices[0] == indices[1]:
result = 0
for d in range(fa.field.spatial_dimensions):
result += (-2 * fa + fa.neighbor(d, -1) + fa.neighbor(d, +1))
else:
assert all(i >= 0 for i in indices)
offsets = [(1, 1), [-1, 1], [1, -1], [-1, -1]]
result = sum(o1 * o2 * fa.neighbor(indices[0], o1).neighbor(indices[1], o2) for o1, o2 in offsets) / 4
return result / (self.dx ** 2)
else:
raise NotImplementedError("Term contains derivatives of order > 2")
def __call__(self, expr):
if isinstance(expr, list):
return [self(e) for e in expr]
......
......@@ -7,6 +7,25 @@ from collections import defaultdict
from collections.abc import Iterable
def get_access_and_direction(term):
direction1 = term.args[1]
if isinstance(term.args[0], ps.Field.Access): # first derivative
access = term.args[0]
direction = (direction1,)
elif isinstance(term.args[0], ps.fd.Diff): # nested derivative
if isinstance(term.args[0].args[0], ps.fd.Diff): # third or higher derivative
raise ValueError("can only handle first and second derivatives")
elif not isinstance(term.args[0].args[0], ps.Field.Access):
raise ValueError("can only handle derivatives of field accesses")
access, direction2 = term.args[0].args[:2]
direction = (direction1, direction2)
else:
raise NotImplementedError(f"can only deal with derivatives of field accesses, "
f"but not {type(term.args[0])}; expansion of derivatives probably failed")
return access, direction
class FVM1stOrder:
"""Finite-volume discretization
......@@ -48,22 +67,7 @@ class FVM1stOrder:
avg = (term.get_shifted(*neighbor) + term) * sp.Rational(1, 2)
return avg
elif isinstance(term, ps.fd.Diff):
direction1 = term.args[1]
if isinstance(term.args[0], ps.Field.Access): # first derivative
access = term.args[0]
direction = (direction1,)
elif isinstance(term.args[0], ps.fd.Diff): # nested derivative
if isinstance(term.args[0].args[0], ps.fd.Diff): # third or higher derivative
raise ValueError("can only handle first and second derivatives")
elif not isinstance(term.args[0].args[0], ps.Field.Access):
raise ValueError("can only handle derivatives of field accesses")
access, direction2 = term.args[0].args[:2]
direction = (direction1, direction2)
else:
raise NotImplementedError("can only deal with derivatives of field accesses, "
"but not {}; expansion of derivatives probably failed"
.format(type(term.args[0])))
access, direction = get_access_and_direction(term)
fds = FDS(neighbor, access.field.spatial_dimensions, direction)
return fds.apply(access)
......@@ -103,22 +107,7 @@ class FVM1stOrder:
def discretize(term):
if isinstance(term, ps.fd.Diff):
direction1 = term.args[1]
if isinstance(term.args[0], ps.Field.Access): # first derivative
access = term.args[0]
direction = (direction1,)
elif isinstance(term.args[0], ps.fd.Diff): # nested derivative
if isinstance(term.args[0].args[0], ps.fd.Diff): # third or higher derivative
raise ValueError("can only handle first and second derivatives")
elif not isinstance(term.args[0].args[0], ps.Field.Access):
raise ValueError("can only handle derivatives of field accesses")
access, direction2 = term.args[0].args[:2]
direction = (direction1, direction2)
else:
raise NotImplementedError("can only deal with derivatives of field accesses, "
"but not {}; expansion of derivatives probably failed"
.format(type(term.args[0])))
access, direction = get_access_and_direction(term)
if self.dim == 2:
stencil = ["".join(a).replace(" ", "") for a in itertools.product("NS ", "EW ")
......
import numpy as np
from pystencils.backends.cbackend import generate_c, get_headers
from pystencils.backends.cbackend import get_headers
from pystencils.backends.cuda_backend import generate_cuda
from pystencils.data_types import StructType
from pystencils.field import FieldType
from pystencils.gpucuda.texture_utils import ndarray_to_tex
......@@ -45,7 +46,7 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
code = includes + "\n"
code += "#define FUNC_PREFIX __global__\n"
code += "#define RESTRICT __restrict__\n\n"
code += str(generate_c(kernel_function_node, dialect='cuda', custom_backend=custom_backend))
code += str(generate_cuda(kernel_function_node, custom_backend=custom_backend))
textures = set(d.interpolator for d in kernel_function_node.atoms(
InterpolatorAccess) if isinstance(d.interpolator, TextureCachedField))
......
......@@ -7,8 +7,6 @@
"""
"""
from os.path import dirname, isdir, join
from typing import Union
import numpy as np
......@@ -21,15 +19,6 @@ except Exception:
pass
def pow_two_divider(n):
if n == 0:
return 0
divider = 1
while (n & divider) == 0:
divider <<= 1
return divider
def ndarray_to_tex(tex_ref, # type: Union[cuda.TextureReference, cuda.SurfaceReference]
ndarray,
address_mode=None,
......@@ -66,65 +55,3 @@ def ndarray_to_tex(tex_ref, # type: Union[cuda.TextureReference, cuda.SurfaceRe
if not read_as_integer:
tex_ref.set_flags(tex_ref.get_flags() & ~cuda.TRSF_READ_AS_INTEGER)
def prefilter_for_cubic_bspline(gpuarray):
import pycuda.autoinit # NOQA
from pycuda.compiler import SourceModule
ndim = gpuarray.ndim
assert ndim == 2 or ndim == 3, "Only 2d or 3d supported"
assert isdir(join(dirname(__file__), "CubicInterpolationCUDA", "code")), \
"Submodule CubicInterpolationCUDA does not exist"
nvcc_options = ["-w", "-std=c++11", "-Wno-deprecated-gpu-targets"]
nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code")]
nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")]
file_name = join(dirname(__file__), "CubicInterpolationCUDA", "code", "cubicPrefilter%iD.cu" % ndim)
with open(file_name) as file:
code = file.read()
mod = SourceModule(code, options=nvcc_options)
if ndim == 2:
height, width = gpuarray.shape
block = min(pow_two_divider(height), 64)
grid = height // block
func = mod.get_function('SamplesToCoefficients2DXf')
func(gpuarray, np.uint32(gpuarray.strides[-2]), *(np.uint32(r)
for r in reversed(gpuarray.shape)),
block=(block, 1, 1),
grid=(grid, 1, 1))
block = min(pow_two_divider(width), 64)
grid = width // block
func = mod.get_function('SamplesToCoefficients2DYf')
func(gpuarray, np.uint32(gpuarray.strides[-2]), *(np.uint32(r)
for r in reversed(gpuarray.shape)),
block=(block, 1, 1),
grid=(grid, 1, 1))
elif ndim == 3:
depth, height, width = gpuarray.shape
dimX = min(min(pow_two_divider(width), pow_two_divider(height)), 64)
dimY = min(min(pow_two_divider(depth), pow_two_divider(height)), 512 / dimX)
block = (dimX, dimY, 1)
dimGridX = (height // block[0], depth // block[1], 1)
dimGridY = (width // block[0], depth // block[1], 1)
dimGridZ = (width // block[0], height // block[1], 1)
func = mod.get_function("SamplesToCoefficients3DXf")
func(gpuarray, np.uint32(gpuarray.strides[-2]), *(np.uint32(r)
for r in reversed(gpuarray.shape)),
block=block,
grid=dimGridX)
func = mod.get_function("SamplesToCoefficients3DYf")
func(gpuarray, np.uint32(gpuarray.strides[-2]), *(np.uint32(r)
for r in reversed(gpuarray.shape)),
block=block,
grid=dimGridY)
func = mod.get_function("SamplesToCoefficients3DZf")
func(gpuarray, np.uint32(gpuarray.strides[-2]), *(np.uint32(r)
for r in reversed(gpuarray.shape)),
block=block,
grid=dimGridZ)
......@@ -7,55 +7,7 @@ from IPython.display import HTML
import pystencils.plot as plt
__all__ = ['log_progress', 'make_imshow_animation', 'display_animation', 'set_display_mode']
def log_progress(sequence, every=None, size=None, name='Items'):
"""Copied from https://github.com/alexanderkuk/log-progress"""
from ipywidgets import IntProgress, HTML, VBox
from IPython.display import display
is_iterator = False
if size is None:
try:
size = len(sequence)
except TypeError:
is_iterator = True
if size is not None:
if every is None:
if size <= 200:
every = 1
else:
every = int(size / 200) # every 0.5%
else:
assert every is not None, 'sequence is iterator, set every'
if is_iterator:
progress = IntProgress(min=0, max=1, value=1)
progress.bar_style = 'info'
else:
progress = IntProgress(min=0, max=size, value=0)
label = HTML()
box = VBox(children=[label, progress])
display(box)
index = 0
try:
for index, record in enumerate(sequence, 1):
if index == 1 or index % every == 0:
if is_iterator:
label.value = f'{name}: {index} / ?'
else:
progress.value = index
label.value = f'{name}: {index} / {size}'
yield record
except:
progress.bar_style = 'danger'
raise
else:
progress.bar_style = 'success'
progress.value = index
label.value = f"{name}: {str(index or '?')}"
__all__ = ['make_imshow_animation', 'display_animation', 'set_display_mode']
VIDEO_TAG = """<video controls width="80%">
......
import numpy as np
from pystencils.backends.cbackend import generate_c, get_headers
from pystencils.backends.cbackend import get_headers
from pystencils.backends.opencl_backend import generate_opencl
from pystencils.gpucuda.cudajit import _build_numpy_argument_list, _check_arguments
from pystencils.include import get_pystencils_include_path
from pystencils.kernel_wrapper import KernelWrapper
......@@ -91,7 +92,7 @@ def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argumen
code = includes + "\n"
code += "#define FUNC_PREFIX __kernel\n"
code += "#define RESTRICT restrict\n\n"
code += str(generate_c(kernel_function_node, dialect='opencl', custom_backend=custom_backend))
code += str(generate_opencl(kernel_function_node, custom_backend=custom_backend))
options = []
if USE_FAST_MATH:
options.append("-cl-unsafe-math-optimizations")
......
......@@ -6,6 +6,8 @@ import numpy as np
import pystencils as ps
from pystencils import create_data_handling, create_kernel
from pystencils.datahandling.pycuda import PyCudaArrayHandler
from pystencils.datahandling.pyopencl import PyOpenClArrayHandler
try:
import pytest
......@@ -355,3 +357,36 @@ def test_load_data():
assert np.all(dh.cpu_arrays['src']) == 0
assert np.all(dh.cpu_arrays['dst']) == 0
assert np.all(dh.cpu_arrays['dst2']) == 0
@pytest.mark.parametrize('target', ('gpu', 'opencl'))
def test_array_handler(target):
size = (2, 2)
if target == 'gpu':
array_handler = PyCudaArrayHandler()
if target == 'opencl':
pytest.importorskip('pyopencl')
import pyopencl as cl
from pystencils.opencl.opencljit import init_globally
init_globally()
ctx = cl.create_some_context(0)
queue = cl.CommandQueue(ctx)
array_handler = PyOpenClArrayHandler(queue)
zero_array = array_handler.zeros(size)
cpu_array = np.empty(size)
array_handler.download(zero_array, cpu_array)
assert np.all(cpu_array) == 0
ones_array =