diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c49077bb27a2af158624e7222d999973487f0951..449e5e2227c63121d1aeb6c824423c4fbb4e54aa 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -65,16 +65,21 @@ minimal-windows: - python -c "import numpy" - python setup.py quicktest -minimal-ubuntu: +ubuntu: stage: test except: variables: - $ENABLE_NIGHTLY_BUILDS - image: i10git.cs.fau.de:5005/pycodegen/pycodegen/minimal_ubuntu + image: i10git.cs.fau.de:5005/pycodegen/pycodegen/ubuntu script: - - python3 setup.py quicktest + - mkdir -p ~/.config/matplotlib + - echo "backend:template" > ~/.config/matplotlib/matplotlibrc + - sed -i 's/--doctest-modules //g' pytest.ini + - pytest-3 -v -m "not longrun" tags: - docker + - cuda + - AVX minimal-conda: stage: test diff --git a/conftest.py b/conftest.py index c20bde40916e7f7f5163c20e1c48c7b0f3d3b3a2..272f42303e620cd13fc7f26648bc03d437b1fda6 100644 --- a/conftest.py +++ b/conftest.py @@ -1,8 +1,8 @@ import os import runpy import sys -import warnings import tempfile +import warnings import nbformat import pytest @@ -34,7 +34,7 @@ def add_path_to_ignore(path): collect_ignore = [os.path.join(SCRIPT_FOLDER, "doc", "conf.py"), - os.path.join(SCRIPT_FOLDER, "pystencils", "opencl", "opencl.autoinit")] + os.path.join(SCRIPT_FOLDER, "pystencils", "opencl", "opencl.autoinit")] add_path_to_ignore('pystencils_tests/benchmark') add_path_to_ignore('_local_tmp') @@ -44,7 +44,7 @@ collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils/autodiff.py")] try: import pycuda except ImportError: - collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils/pystencils_tests/test_cudagpu.py")] + collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils_tests/test_cudagpu.py")] add_path_to_ignore('pystencils/gpucuda') try: @@ -73,7 +73,22 @@ try: import blitzdb except ImportError: add_path_to_ignore('pystencils/runhelper') + collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils_tests/test_parameterstudy.py")] + +try: + import islpy +except ImportError: + collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils/integer_set_analysis.py")] + +try: + import graphviz +except ImportError: + collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils/backends/dot.py")] +try: + import pyevtk +except ImportError: + collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils/datahandling/vtk.py")] collect_ignore += [os.path.join(SCRIPT_FOLDER, 'setup.py')] @@ -83,8 +98,6 @@ for root, sub_dirs, files in os.walk('.'): collect_ignore.append(f) - - class IPythonMockup: def run_line_magic(self, *args, **kwargs): pass @@ -131,7 +144,7 @@ class IPyNbFile(pytest.File): exporter.exclude_markdown = True exporter.exclude_input_prompt = True - notebook_contents = self.fspath.open() + notebook_contents = self.fspath.open(encoding='utf-8') with warnings.catch_warnings(): warnings.filterwarnings("ignore", "IPython.core.inputsplitter is deprecated") diff --git a/doc/notebooks/01_tutorial_getting_started.ipynb b/doc/notebooks/01_tutorial_getting_started.ipynb index 97ff73da1cf3bb63e19fa6fac122499120d71cee..70b3c2a8a3e86d81fb8972ad50ffbdd0ad4bcb89 100644 --- a/doc/notebooks/01_tutorial_getting_started.ipynb +++ b/doc/notebooks/01_tutorial_getting_started.ipynb @@ -932,14 +932,18 @@ } ], "source": [ - "import requests\n", - "import imageio\n", - "from io import BytesIO\n", + "try:\n", + " import requests\n", + " import imageio\n", + " from io import BytesIO\n", "\n", - "response = requests.get(\"https://www.python.org/static/img/python-logo.png\")\n", - "img = imageio.imread(BytesIO(response.content)).astype(np.double)\n", - "img /= img.max()\n", - "plt.imshow(img);" + " response = requests.get(\"https://www.python.org/static/img/python-logo.png\")\n", + " img = imageio.imread(BytesIO(response.content)).astype(np.double)\n", + " img /= img.max()\n", + " plt.imshow(img);\n", + "except ImportError:\n", + " print(\"No requests installed\")\n", + " img = np.random.random((82, 290, 4))" ] }, { diff --git a/doc/notebooks/demo_plotting_and_animation.ipynb b/doc/notebooks/demo_plotting_and_animation.ipynb index 07c980788f7617b1bfb8bfad8a13f2f2d7b5c33d..13d658a9cb4980a78cc984caeeb78e220b543553 100644 --- a/doc/notebooks/demo_plotting_and_animation.ipynb +++ b/doc/notebooks/demo_plotting_and_animation.ipynb @@ -6,7 +6,9 @@ "metadata": {}, "outputs": [], "source": [ - "from pystencils.session import *" + "from pystencils.session import *\n", + "\n", + "import shutil" ] }, { @@ -408,9 +410,12 @@ } ], "source": [ - "plt.figure()\n", - "animation = plt.scalar_field_animation(run_func, frames=60)\n", - "ps.jupyter.display_as_html_video(animation)" + "if shutil.which(\"ffmpeg\") is not None:\n", + " plt.figure()\n", + " animation = plt.scalar_field_animation(run_func, frames=60)\n", + " ps.jupyter.display_as_html_video(animation)\n", + "else:\n", + " print(\"No ffmpeg installed\")" ] }, { @@ -474,8 +479,11 @@ } ], "source": [ - "animation = plt.surface_plot_animation(run_func, frames=60)\n", - "ps.jupyter.display_as_html_video(animation)" + "if shutil.which(\"ffmpeg\") is not None:\n", + " animation = plt.surface_plot_animation(run_func, frames=60)\n", + " ps.jupyter.display_as_html_video(animation)\n", + "else:\n", + " print(\"No ffmpeg installed\")" ] }, { @@ -636,9 +644,12 @@ } ], "source": [ - "plt.figure()\n", - "animation = plt.vector_field_animation(run_func, frames=60)\n", - "ps.jupyter.display_as_html_video(animation)" + "if shutil.which(\"ffmpeg\") is not None:\n", + " plt.figure()\n", + " animation = plt.vector_field_animation(run_func, frames=60)\n", + " ps.jupyter.display_as_html_video(animation)\n", + "else:\n", + " print(\"No ffmpeg installed\")" ] }, { @@ -671,8 +682,11 @@ } ], "source": [ - "animation = plt.vector_field_magnitude_animation(run_func, frames=60)\n", - "ps.jupyter.display_as_html_video(animation)" + "if shutil.which(\"ffmpeg\") is not None:\n", + " animation = plt.vector_field_magnitude_animation(run_func, frames=60)\n", + " ps.jupyter.display_as_html_video(animation)\n", + "else:\n", + " print(\"No ffmpeg installed\")" ] }, { diff --git a/doc/notebooks/demo_wave_equation.ipynb b/doc/notebooks/demo_wave_equation.ipynb index a4d5a9ff7c1cf5df33b83d3d48c3272f214a4d39..2aa083d96b33e6b2c69bee0783e1c55b4ab616fd 100644 --- a/doc/notebooks/demo_wave_equation.ipynb +++ b/doc/notebooks/demo_wave_equation.ipynb @@ -8,7 +8,9 @@ }, "outputs": [], "source": [ - "from pystencils.session import *" + "from pystencils.session import *\n", + "\n", + "import shutil" ] }, { @@ -397,8 +399,11 @@ } ], "source": [ - "ani = plt.surface_plot_animation(run, zlim=(-1, 1))\n", - "ps.jupyter.display_as_html_video(ani)" + "if shutil.which(\"ffmpeg\") is not None:\n", + " ani = plt.surface_plot_animation(run, zlim=(-1, 1))\n", + " ps.jupyter.display_as_html_video(ani)\n", + "else:\n", + " print(\"No ffmpeg installed\")" ] }, { @@ -466,9 +471,12 @@ " u_arrays[0], u_arrays[1], u_arrays[2] = u_arrays[1], u_arrays[2], u_arrays[0]\n", " return u_arrays[2]\n", " \n", - " ani = plt.surface_plot_animation(run_LLVM, zlim=(-1, 1))\n", " assert np.isfinite(np.max(u_arrays[2]))\n", - "ps.jupyter.display_as_html_video(ani)" + " if shutil.which(\"ffmpeg\") is not None:\n", + " ani = plt.surface_plot_animation(run_LLVM, zlim=(-1, 1))\n", + " ps.jupyter.display_as_html_video(ani)\n", + " else:\n", + " print(\"No ffmpeg installed\")" ] }, { diff --git a/pystencils/__init__.py b/pystencils/__init__.py index ed08ff1327c68a1a116edb21547a86f8b27a8c81..18679382f0014ba12844c72fa965a3ad4fe6e196 100644 --- a/pystencils/__init__.py +++ b/pystencils/__init__.py @@ -5,7 +5,7 @@ from . import stencil as stencil from .assignment import Assignment, assignment_from_stencil from .data_types import TypedSymbol from .datahandling import create_data_handling -from .display_utils import show_code, to_dot +from .display_utils import show_code, get_code_obj, get_code_str, to_dot from .field import Field, FieldType, fields from .kernel_decorator import kernel from .kernelcreation import create_indexed_kernel, create_kernel, create_staggered_kernel @@ -25,7 +25,7 @@ __all__ = ['Field', 'FieldType', 'fields', 'TypedSymbol', 'make_slice', 'create_kernel', 'create_indexed_kernel', 'create_staggered_kernel', - 'show_code', 'to_dot', + 'show_code', 'to_dot', 'get_code_obj', 'get_code_str', 'AssignmentCollection', 'Assignment', 'assignment_from_stencil', diff --git a/pystencils/assignment.py b/pystencils/assignment.py index 0bf68799491be29886d215d5ff76010c034bd174..92027f7773d2202765ab2ef09c5269654098060e 100644 --- a/pystencils/assignment.py +++ b/pystencils/assignment.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import numpy as np import sympy as sp from sympy.printing.latex import LatexPrinter @@ -24,9 +23,20 @@ def assignment_str(assignment): if Assignment: + _old_new = sp.codegen.ast.Assignment.__new__ + + def _Assignment__new__(cls, lhs, rhs, *args, **kwargs): + if isinstance(lhs, (list, tuple, sp.Matrix)) and isinstance(rhs, (list, tuple, sp.Matrix)): + assert len(lhs) == len(rhs), f'{lhs} and {rhs} must have same length when performing vector assignment!' + return tuple(_old_new(cls, a, b, *args, **kwargs) for a, b in zip(lhs, rhs)) + return _old_new(cls, lhs, rhs, *args, **kwargs) + Assignment.__str__ = assignment_str + Assignment.__new__ = _Assignment__new__ LatexPrinter._print_Assignment = print_assignment_latex + sp.MutableDenseMatrix.__hash__ = lambda self: hash(tuple(self)) + else: # back port for older sympy versions that don't have Assignment yet diff --git a/pystencils/astnodes.py b/pystencils/astnodes.py index a9c7c98f2a685112a249b03a7c2bd8be0afcd363..8ec80917af20aefc5883c68ac51f360005b4350b 100644 --- a/pystencils/astnodes.py +++ b/pystencils/astnodes.py @@ -110,10 +110,17 @@ class Conditional(Node): return result def __str__(self): - return 'if:({!s}) '.format(self.condition_expr) + return self.__repr__() def __repr__(self): - return 'if:({!r}) '.format(self.condition_expr) + repr = 'if:({!r}) '.format(self.condition_expr) + if self.true_block: + repr += '\n\t{}) '.format(self.true_block) + if self.false_block: + repr = 'else: '.format(self.false_block) + repr += '\n\t{} '.format(self.false_block) + + return repr def replace_by_true_block(self): """Replaces the conditional by its True block""" @@ -284,7 +291,10 @@ class Block(Node): self._nodes = nodes self.parent = None for n in self._nodes: - n.parent = self + try: + n.parent = self + except AttributeError: + pass @property def args(self): diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py index cf58431309e1ddb0ba963fa808f9db5261448091..46d9e3a77e6a163acec4fbfcf1ddf0f9a15f8712 100644 --- a/pystencils/backends/cbackend.py +++ b/pystencils/backends/cbackend.py @@ -1,3 +1,4 @@ +import re from collections import namedtuple from typing import Set @@ -10,11 +11,12 @@ from sympy.printing.ccode import C89CodePrinter from pystencils.astnodes import KernelFunction, Node from pystencils.cpu.vectorization import vec_all, vec_any from pystencils.data_types import ( - PointerType, VectorType, address_of, cast_func, create_type, get_type_of_expression, reinterpret_cast_func, - vector_memory_access) + PointerType, VectorType, address_of, cast_func, create_type, get_type_of_expression, + reinterpret_cast_func, vector_memory_access) from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt from pystencils.integer_functions import ( - bit_shift_left, bit_shift_right, bitwise_and, bitwise_or, bitwise_xor, int_div, int_power_of_2, modulo_ceil) + bit_shift_left, bit_shift_right, bitwise_and, bitwise_or, bitwise_xor, + int_div, int_power_of_2, modulo_ceil) try: from sympy.printing.ccode import C99CodePrinter as CCodePrinter @@ -23,6 +25,9 @@ except ImportError: __all__ = ['generate_c', 'CustomCodeNode', 'PrintNode', 'get_headers', 'CustomSympyPrinter'] + +HEADER_REGEX = re.compile(r'^[<"].*[">]$') + KERNCRAFT_NO_TERNARY_MODE = False @@ -91,7 +96,7 @@ def get_global_declarations(ast): visit_node(ast) - return sorted(set(global_declarations), key=lambda x: str(x)) + return sorted(set(global_declarations), key=str) def get_headers(ast_node: Node) -> Set[str]: @@ -111,6 +116,9 @@ def get_headers(ast_node: Node) -> Set[str]: if isinstance(g, Node): headers.update(get_headers(g)) + for h in headers: + assert HEADER_REGEX.match(h), f'header /{h}/ does not follow the pattern /"..."/ or /<...>/' + return sorted(headers) @@ -392,6 +400,13 @@ class CustomSympyPrinter(CCodePrinter): return self._print(expr.args[0]) elif isinstance(expr, fast_inv_sqrt): return "({})".format(self._print(1 / sp.sqrt(expr.args[0]))) + elif isinstance(expr, sp.Abs): + return "abs({})".format(self._print(expr.args[0])) + elif isinstance(expr, sp.Mod): + if expr.args[0].is_integer and expr.args[1].is_integer: + return "({} % {})".format(self._print(expr.args[0]), self._print(expr.args[1])) + else: + return "fmod({}, {})".format(self._print(expr.args[0]), self._print(expr.args[1])) elif expr.func in infix_functions: return "(%s %s %s)" % (self._print(expr.args[0]), infix_functions[expr.func], self._print(expr.args[1])) elif expr.func == int_power_of_2: diff --git a/pystencils/backends/cuda_backend.py b/pystencils/backends/cuda_backend.py index 8bc58468901a006bc7ef8f278e3e7a544234de52..d590d65b4082e72745658f0b06eb152d64872944 100644 --- a/pystencils/backends/cuda_backend.py +++ b/pystencils/backends/cuda_backend.py @@ -3,7 +3,7 @@ from os.path import dirname, join from pystencils.astnodes import Node from pystencils.backends.cbackend import CBackend, CustomSympyPrinter, generate_c from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt -from pystencils.interpolation_astnodes import InterpolationMode +from pystencils.interpolation_astnodes import DiffInterpolatorAccess, InterpolationMode with open(join(dirname(__file__), 'cuda_known_functions.txt')) as f: lines = f.readlines() @@ -70,10 +70,13 @@ class CudaSympyPrinter(CustomSympyPrinter): super(CudaSympyPrinter, self).__init__() self.known_functions.update(CUDA_KNOWN_FUNCTIONS) - def _print_TextureAccess(self, node): - dtype = node.texture.field.dtype.numpy_dtype + def _print_InterpolatorAccess(self, node): + dtype = node.interpolator.field.dtype.numpy_dtype - if node.texture.interpolation_mode == InterpolationMode.CUBIC_SPLINE: + if type(node) == DiffInterpolatorAccess: + # cubicTex3D_1st_derivative_x(texture tex, float3 coord) + template = f"cubicTex%iD_1st_derivative_{list(reversed('xyz'[:node.ndim]))[node.diff_coordinate_idx]}(%s, %s)" # noqa + elif node.interpolator.interpolation_mode == InterpolationMode.CUBIC_SPLINE: template = "cubicTex%iDSimple(%s, %s)" else: if dtype.itemsize > 4: @@ -84,8 +87,8 @@ class CudaSympyPrinter(CustomSympyPrinter): template = "tex%iD(%s, %s)" code = template % ( - node.texture.field.spatial_dimensions, - str(node.texture), + node.interpolator.field.spatial_dimensions, + str(node.interpolator), # + 0.5 comes from Nvidia's staggered indexing ', '.join(self._print(o + 0.5) for o in reversed(node.offsets)) ) diff --git a/pystencils/boundaries/boundaryhandling.py b/pystencils/boundaries/boundaryhandling.py index 94d12619663190104f80894efebdc0955d5ac592..0a33fde2e2cee7a2f8f96d59d119f272b89d5b6e 100644 --- a/pystencils/boundaries/boundaryhandling.py +++ b/pystencils/boundaries/boundaryhandling.py @@ -8,6 +8,8 @@ from pystencils.boundaries.createindexlist import ( create_boundary_index_array, numpy_data_type_for_boundary_object) from pystencils.cache import memorycache from pystencils.data_types import TypedSymbol, create_type +from pystencils.datahandling import ParallelDataHandling +from pystencils.datahandling.pycuda import PyCudaArrayHandler from pystencils.field import Field from pystencils.kernelparameters import FieldPointerSymbol @@ -96,16 +98,23 @@ class BoundaryHandling: def to_gpu(gpu_version, cpu_version): gpu_version = gpu_version.boundary_object_to_index_list cpu_version = cpu_version.boundary_object_to_index_list + + if ParallelDataHandling and isinstance(self.data_handling, ParallelDataHandling): + array_handler = PyCudaArrayHandler() + else: + array_handler = self.data_handling.array_handler + for obj, cpu_arr in cpu_version.items(): if obj not in gpu_version or gpu_version[obj].shape != cpu_arr.shape: - gpu_version[obj] = self.data_handling.array_handler.to_gpu(cpu_arr) + gpu_version[obj] = array_handler.to_gpu(cpu_arr) else: - self.data_handling.array_handler.upload(gpu_version[obj], cpu_arr) + array_handler.upload(gpu_version[obj], cpu_arr) class_ = self.IndexFieldBlockData class_.to_cpu = to_cpu class_.to_gpu = to_gpu - data_handling.add_custom_class(self._index_array_name, class_) + gpu = self._target in data_handling._GPU_LIKE_TARGETS + data_handling.add_custom_class(self._index_array_name, class_, cpu=True, gpu=gpu) @property def data_handling(self): @@ -253,11 +262,13 @@ class BoundaryHandling: """ Writes a VTK field where each cell with the given boundary is marked with 1, other cells are 0 This can be used to display the simulation geometry in Paraview - :param file_name: vtk filename - :param boundaries: boundary object, or special string 'domain' for domain cells or special string 'all' for all - boundary conditions. - can also be a sequence, to write multiple boundaries to VTK file - :param ghost_layers: number of ghost layers to write, or True for all, False for none + + Params: + file_name: vtk filename + boundaries: boundary object, or special string 'domain' for domain cells or special string 'all' for all + boundary conditions. + can also be a sequence, to write multiple boundaries to VTK file + ghost_layers: number of ghost layers to write, or True for all, False for none """ if boundaries == 'all': boundaries = list(self._boundary_object_to_boundary_info.keys()) + ['domain'] @@ -329,7 +340,7 @@ class BoundaryHandling: self.kernel = kernel class IndexFieldBlockData: - def __init__(self): + def __init__(self, *args, **kwargs): self.boundary_object_to_index_list = {} self.boundary_object_to_data_setter = {} diff --git a/pystencils/cpu/cpujit.py b/pystencils/cpu/cpujit.py index 6376ffb85bff4dca347e0dde7044a1d92ad7bab7..07a8a84d9c6d10ed8580a7ef83d6720b522cfd98 100644 --- a/pystencils/cpu/cpujit.py +++ b/pystencils/cpu/cpujit.py @@ -270,7 +270,8 @@ if( PyErr_Occurred() ) {{ return NULL; }} template_extract_complex = """ PyObject * obj_{name} = PyDict_GetItemString(kwargs, "{name}"); if( obj_{name} == NULL) {{ PyErr_SetString(PyExc_TypeError, "Keyword argument '{name}' missing"); return NULL; }}; -{target_type} {name}{{ {extract_function_real}( obj_{name} ), {extract_function_imag}( obj_{name} ) }}; +{target_type} {name}{{ ({real_type}) {extract_function_real}( obj_{name} ), + ({real_type}) {extract_function_imag}( obj_{name} ) }}; if( PyErr_Occurred() ) {{ return NULL; }} """ @@ -409,6 +410,8 @@ def create_function_boilerplate_code(parameter_info, name, insert_checks=True): pre_call_code += template_extract_complex.format(extract_function_real=extract_function[0], extract_function_imag=extract_function[1], target_type=target_type, + real_type="float" if target_type == "ComplexFloat" + else "double", name=param.symbol.name) else: pre_call_code += template_extract_scalar.format(extract_function=extract_function, diff --git a/pystencils/data_types.py b/pystencils/data_types.py index 5d553006bcca4c1cccbddb681d8710a92b75c437..786351f528573179543900bde760f675ddfe10a6 100644 --- a/pystencils/data_types.py +++ b/pystencils/data_types.py @@ -575,9 +575,7 @@ def get_type_of_expression(expr, raise NotImplementedError("Could not determine type for", expr, type(expr)) -class Type(sp.Basic): - is_Atom = True - +class Type(sp.Atom): def __new__(cls, *args, **kwargs): return sp.Basic.__new__(cls) diff --git a/pystencils/display_utils.py b/pystencils/display_utils.py index 610c404b709885e7445eba3be412f6811fca0241..cb09197bf175b43c4896072693ea11253976691f 100644 --- a/pystencils/display_utils.py +++ b/pystencils/display_utils.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Union import sympy as sp @@ -35,10 +35,10 @@ def highlight_cpp(code: str): return HTML(highlight(code, CppLexer(), HtmlFormatter())) -def show_code(ast: KernelFunction, custom_backend=None): +def get_code_obj(ast: Union[KernelFunction, KernelWrapper], custom_backend=None): """Returns an object to display generated code (C/C++ or CUDA) - Can either be displayed as HTML in Jupyter notebooks or printed as normal string. + Can either be displayed as HTML in Jupyter notebooks or printed as normal string. """ from pystencils.backends.cbackend import generate_c @@ -65,3 +65,37 @@ def show_code(ast: KernelFunction, custom_backend=None): def __repr__(self): return generate_c(self.ast, dialect=dialect, custom_backend=custom_backend) return CodeDisplay(ast) + + +def get_code_str(ast, custom_backend=None): + return str(get_code_obj(ast, custom_backend)) + + +def _isnotebook(): + try: + shell = get_ipython().__class__.__name__ + if shell == 'ZMQInteractiveShell': + return True # Jupyter notebook or qtconsole + elif shell == 'TerminalInteractiveShell': + return False # Terminal running IPython + else: + return False # Other type (?) + except NameError: + return False + + +def show_code(ast: Union[KernelFunction, KernelWrapper], custom_backend=None): + code = get_code_obj(ast, custom_backend) + + if _isnotebook(): + from IPython.display import display + display(code) + else: + try: + import rich.syntax + import rich.console + syntax = rich.syntax.Syntax(str(code), "c++", theme="monokai", line_numbers=True) + console = rich.console.Console() + console.print(syntax) + except ImportError: + print(code) diff --git a/pystencils/fd/derivation.py b/pystencils/fd/derivation.py index 4af4f3e3653e78668ad5b84e9ce9f19e7586b63c..2b67b89cf6ef0176ee3912960c8be5e859d733bc 100644 --- a/pystencils/fd/derivation.py +++ b/pystencils/fd/derivation.py @@ -1,6 +1,6 @@ +import itertools import warnings from collections import defaultdict -import itertools import numpy as np import sympy as sp @@ -184,6 +184,9 @@ class FiniteDifferenceStencilDerivation: result[max_offset - direction[1], max_offset + direction[0]] = weight return result + def __array__(self): + return np.array(self.as_array().tolist()) + def as_array(self): dim = len(self.stencil[0]) assert (dim == 2 or dim == 3), "Only 2D or 3D matrix representations are available" @@ -205,12 +208,12 @@ class FiniteDifferenceStencilDerivation: return result - def rotate_weights_and_apply(self, field_access: Field.Access, axis): + def rotate_weights_and_apply(self, field_access: Field.Access, axes): """derive gradient weights of other direction with already calculated weights of one direction via rotation and apply them to a field.""" dim = len(self.stencil[0]) assert (dim == 2 or dim == 3), "This function is only for 2D or 3D stencils available" - rotated_weights = np.rot90(np.array(self.as_array()).reshape(self.as_array().shape), 1, axis) + rotated_weights = np.rot90(np.array(self), 1, axes) result = [] max_offset = max(max(abs(e) for e in direction) for direction in self.stencil) diff --git a/pystencils/fd/derivative.py b/pystencils/fd/derivative.py index 7acd245059615ded27e2c1fe023e344b48c34a6e..31daf4294f4ffa70c766632c4126dd7403690251 100644 --- a/pystencils/fd/derivative.py +++ b/pystencils/fd/derivative.py @@ -111,6 +111,16 @@ class Diff(sp.Expr): def __str__(self): return "D(%s)" % self.arg + def interpolated_access(self, offset, **kwargs): + """Represents an interpolated access on a spatially differentiated field + + Args: + offset (Tuple[sympy.Expr]): Absolute position to determine the value of the spatial derivative + """ + from pystencils.interpolation_astnodes import DiffInterpolatorAccess + assert isinstance(self.arg.field, Field), "Must be field to enable interpolated accesses" + return DiffInterpolatorAccess(self.arg.field.interpolated_access(offset, **kwargs).symbol, self.target, *offset) + class DiffOperator(sp.Expr): """Un-applied differential, i.e. differential operator @@ -306,7 +316,8 @@ def expand_diff_full(expr, functions=None, constants=None): functions.difference_update(constants) def visit(e): - e = e.expand() + if not isinstance(e, sp.Tuple): + e = e.expand() if e.func == Diff: result = 0 @@ -331,6 +342,9 @@ def expand_diff_full(expr, functions=None, constants=None): return result elif isinstance(e, sp.Piecewise): return sp.Piecewise(*((expand_diff_full(a, functions, constants), b) for a, b in e.args)) + elif isinstance(expr, sp.Tuple): + new_args = [visit(arg) for arg in e.args] + return sp.Tuple(*new_args) else: new_args = [visit(arg) for arg in e.args] return e.func(*new_args) if new_args else e @@ -370,6 +384,9 @@ def expand_diff_linear(expr, functions=None, constants=None): return diff.split_linear(functions) elif isinstance(expr, sp.Piecewise): return sp.Piecewise(*((expand_diff_linear(a, functions, constants), b) for a, b in expr.args)) + elif isinstance(expr, sp.Tuple): + new_args = [expand_diff_linear(e, functions) for e in expr.args] + return sp.Tuple(*new_args) else: new_args = [expand_diff_linear(e, functions) for e in expr.args] result = sp.expand(expr.func(*new_args) if new_args else expr) diff --git a/pystencils/fd/finitedifferences.py b/pystencils/fd/finitedifferences.py index d5bce66e96d8aa5d296df68906c8871d40c08bba..5b6b15f95bebec40939d54a8ff2ad6c58c169f58 100644 --- a/pystencils/fd/finitedifferences.py +++ b/pystencils/fd/finitedifferences.py @@ -109,7 +109,9 @@ class Discretization2ndOrder: return self._discretize_advection(e) elif isinstance(e, Diff): arg, *indices = diff_args(e) - if not isinstance(arg, Field.Access): + from pystencils.interpolation_astnodes import InterpolatorAccess + + if not isinstance(arg, (Field.Access, InterpolatorAccess)): raise ValueError("Only derivatives with field or field accesses as arguments can be discretized") return self.spatial_stencil(indices, self.dx, arg) else: diff --git a/pystencils/field.py b/pystencils/field.py index 0bf80b211348b6e5c1975a86f4d51fc50cb9fdd4..6b85dead11778bf5aa6a820d4f34e8b547a95df4 100644 --- a/pystencils/field.py +++ b/pystencils/field.py @@ -332,7 +332,7 @@ class Field(AbstractField): self.latex_name: Optional[str] = None self.coordinate_origin: tuple[float, sp.Symbol] = sp.Matrix(tuple( 0 for _ in range(self.spatial_dimensions) - )) # type + )) self.coordinate_transform = sp.eye(self.spatial_dimensions) if field_type == FieldType.STAGGERED: assert self.staggered_stencil diff --git a/pystencils/gpucuda/cudajit.py b/pystencils/gpucuda/cudajit.py index 28ec47d0ba22e92cd4064b3400a0d055a87be7cb..86f251c97b95f04c52aaa803398ddb78ba42a2c1 100644 --- a/pystencils/gpucuda/cudajit.py +++ b/pystencils/gpucuda/cudajit.py @@ -5,13 +5,20 @@ from pystencils.data_types import StructType from pystencils.field import FieldType from pystencils.gpucuda.texture_utils import ndarray_to_tex from pystencils.include import get_pycuda_include_path, get_pystencils_include_path -from pystencils.interpolation_astnodes import TextureAccess +from pystencils.interpolation_astnodes import InterpolatorAccess, TextureCachedField from pystencils.kernel_wrapper import KernelWrapper from pystencils.kernelparameters import FieldPointerSymbol USE_FAST_MATH = True +def get_cubic_interpolation_include_paths(): + from os.path import join, dirname + + return [join(dirname(__file__), "CubicInterpolationCUDA", "code"), + join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")] + + def make_python_function(kernel_function_node, argument_dict=None, custom_backend=None): """ Creates a kernel function from an abstract syntax tree which @@ -39,23 +46,28 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen code += "#define FUNC_PREFIX __global__\n" code += "#define RESTRICT __restrict__\n\n" code += str(generate_c(kernel_function_node, dialect='cuda', custom_backend=custom_backend)) - textures = set(d.texture for d in kernel_function_node.atoms(TextureAccess)) + textures = set(d.interpolator for d in kernel_function_node.atoms( + InterpolatorAccess) if isinstance(d.interpolator, TextureCachedField)) nvcc_options = ["-w", "-std=c++11", "-Wno-deprecated-gpu-targets"] if USE_FAST_MATH: nvcc_options.append("-use_fast_math") - # Code for - # if any(t.interpolation_mode == InterpolationMode.CUBIC_SPLINE for t in textures): - # assert isdir(join(dirname(__file__), "CubicInterpolationCUDA", "code")), \ - # "Submodule CubicInterpolationCUDA does not exist" - # nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code")] - # nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")] - - # needed_dims = set(t.field.spatial_dimensions for t in textures - # if t.interpolation_mode == InterpolationMode.CUBIC_SPLINE) - # for i in needed_dims: - # code = 'extern "C++" {\n#include "cubicTex%iD.cu"\n}\n' % i + code + # Code for CubicInterpolationCUDA + from pystencils.interpolation_astnodes import InterpolationMode + from os.path import join, dirname, isdir + + if any(t.interpolation_mode == InterpolationMode.CUBIC_SPLINE for t in textures): + assert isdir(join(dirname(__file__), ("CubicInterpolationCUDA", "code")), + "Submodule CubicInterpolationCUDA does not exist.\n" + + "Clone https://github.com/theHamsta/CubicInterpolationCUDA into pystencils.gpucuda") + nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code")] + nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")] + + needed_dims = set(t.field.spatial_dimensions for t in textures + if t.interpolation_mode == InterpolationMode.CUBIC_SPLINE) + for i in needed_dims: + code = 'extern "C++" {\n#include "cubicTex%iD.cu"\n}\n' % i + code mod = SourceModule(code, options=nvcc_options, include_dirs=[ get_pystencils_include_path(), get_pycuda_include_path()]) diff --git a/pystencils/gpucuda/texture_utils.py b/pystencils/gpucuda/texture_utils.py index d29d862929530b27f840da445467a2bae44b1bfb..ff3db430fc841f4121b9602b57a1c1aa08ef86f9 100644 --- a/pystencils/gpucuda/texture_utils.py +++ b/pystencils/gpucuda/texture_utils.py @@ -15,6 +15,7 @@ import numpy as np try: import pycuda.driver as cuda from pycuda import gpuarray + import pycuda except Exception: pass @@ -35,6 +36,8 @@ def ndarray_to_tex(tex_ref, use_normalized_coordinates=False, read_as_integer=False): + if isinstance(address_mode, str): + address_mode = getattr(pycuda.driver.address_mode, address_mode.upper()) if address_mode is None: address_mode = cuda.address_mode.BORDER if filter_mode is None: diff --git a/pystencils/include/cuda_complex.hpp b/pystencils/include/cuda_complex.hpp index ad555264a87881d8eaee6b2476c482039d606f71..535aa52e30781a03e170298e8e5ebbed872585dc 100644 --- a/pystencils/include/cuda_complex.hpp +++ b/pystencils/include/cuda_complex.hpp @@ -1173,53 +1173,53 @@ operator<<(std::basic_ostream<_CharT, _Traits> &__os, const complex<_Tp> &__x) { template <class U, class V> CUDA_CALLABLE_MEMBER auto operator*(const complex<U> &complexNumber, const V &scalar) -> complex<U> { - return complex<U>{real(complexNumber) * scalar, imag(complexNumber) * scalar}; + return complex<U>(real(complexNumber) * scalar, imag(complexNumber) * scalar); } template <class U, class V> CUDA_CALLABLE_MEMBER auto operator*(const V &scalar, const complex<U> &complexNumber) -> complex<U> { - return complex<U>{real(complexNumber) * scalar, imag(complexNumber) * scalar}; + return complex<U>(real(complexNumber) * scalar, imag(complexNumber) * scalar); } template <class U, class V> CUDA_CALLABLE_MEMBER auto operator+(const complex<U> &complexNumber, const V &scalar) -> complex<U> { - return complex<U>{real(complexNumber) + scalar, imag(complexNumber)}; + return complex<U>(real(complexNumber) + scalar, imag(complexNumber)); } template <class U, class V> CUDA_CALLABLE_MEMBER auto operator+(const V &scalar, const complex<U> &complexNumber) -> complex<U> { - return complex<U>{real(complexNumber) + scalar, imag(complexNumber)}; + return complex<U>(real(complexNumber) + scalar, imag(complexNumber)); } template <class U, class V> CUDA_CALLABLE_MEMBER auto operator-(const complex<U> &complexNumber, const V &scalar) -> complex<U> { - return complex<U>{real(complexNumber) - scalar, imag(complexNumber)}; + return complex<U>(real(complexNumber) - scalar, imag(complexNumber)); } template <class U, class V> CUDA_CALLABLE_MEMBER auto operator-(const V &scalar, const complex<U> &complexNumber) -> complex<U> { - return complex<U>{scalar - real(complexNumber), imag(complexNumber)}; + return complex<U>(scalar - real(complexNumber), imag(complexNumber)); } template <class U, class V> CUDA_CALLABLE_MEMBER auto operator/(const complex<U> &complexNumber, const V scalar) -> complex<U> { - return complex<U>{real(complexNumber) / scalar, imag(complexNumber) / scalar}; + return complex<U>(real(complexNumber) / scalar, imag(complexNumber) / scalar); } template <class U, class V> CUDA_CALLABLE_MEMBER auto operator/(const V scalar, const complex<U> &complexNumber) -> complex<U> { - return complex<U>{scalar, 0} / complexNumber; + return complex<U>(scalar, 0) / complexNumber; } using ComplexDouble = complex<double>; diff --git a/pystencils/interpolation_astnodes.py b/pystencils/interpolation_astnodes.py index 62d291b08e634cc0ad57eb37dca7a779581d88e1..76bd340b7e2c9bd66d9c82f77bc0399208017e52 100644 --- a/pystencils/interpolation_astnodes.py +++ b/pystencils/interpolation_astnodes.py @@ -35,6 +35,27 @@ class InterpolationMode(str, Enum): CUBIC_SPLINE = "cubic_spline" +class _InterpolationSymbol(TypedSymbol): + + def __new__(cls, name, field, interpolator): + obj = cls.__xnew_cached_(cls, name, field, interpolator) + return obj + + def __new_stage2__(cls, name, field, interpolator): + obj = super().__xnew__(cls, name, 'dummy_symbol_carrying_field' + field.name) + obj.field = field + obj.interpolator = interpolator + return obj + + def __getnewargs__(self): + return self.name, self.field, self.interpolator + + # noinspection SpellCheckingInspection + __xnew__ = staticmethod(__new_stage2__) + # noinspection SpellCheckingInspection + __xnew_cached_ = staticmethod(cacheit(__new_stage2__)) + + class Interpolator(object): """ Implements non-integer accesses on fields using linear interpolation. @@ -77,24 +98,25 @@ class Interpolator(object): allow_textures=True): super().__init__() - self.field = parent_field.new_field_with_different_name(parent_field.name) + self.field = parent_field self.field.field_type = pystencils.field.FieldType.CUSTOM self.address_mode = address_mode self.use_normalized_coordinates = use_normalized_coordinates - hash_str = "%x" % abs(hash(self.field) + hash(address_mode)) - self.symbol = TypedSymbol('dummy_symbol_carrying_field' + self.field.name + hash_str, - 'dummy_symbol_carrying_field' + self.field.name + hash_str) - self.symbol.field = self.field - self.symbol.interpolator = self - self.allow_textures = allow_textures self.interpolation_mode = interpolation_mode + self.hash_str = hashlib.md5( + f'{self.field}_{address_mode}_{self.field.dtype}_{interpolation_mode}'.encode()).hexdigest() + self.symbol = _InterpolationSymbol(str(self), parent_field, self) + self.allow_textures = allow_textures + + @property + def ndim(self): + return self.field.ndim @property def _hashable_contents(self): return (str(self.address_mode), str(type(self)), - self.symbol, - self.address_mode, + self.hash_str, self.use_normalized_coordinates) def at(self, offset): @@ -112,6 +134,9 @@ class Interpolator(object): def __hash__(self): return hash(self._hashable_contents) + def __eq__(self, other): + return hash(self) == hash(other) + @property def reproducible_hash(self): return _hash(str(self._hashable_contents).encode()).hexdigest() @@ -142,29 +167,43 @@ class NearestNeightborInterpolator(Interpolator): class InterpolatorAccess(TypedSymbol): - def __new__(cls, field, offsets, *args, **kwargs): - obj = TextureAccess.__xnew_cached_(cls, field, offsets, *args, **kwargs) + def __new__(cls, field, *offsets, **kwargs): + obj = InterpolatorAccess.__xnew_cached_(cls, field, *offsets, **kwargs) return obj - def __new_stage2__(self, symbol, *offsets): + def __new_stage2__(cls, symbol, *offsets): assert offsets is not None - obj = super().__xnew__(self, '%s_interpolator_%x' % - (symbol.field.name, abs(hash(tuple(offsets)))), symbol.field.dtype) + obj = super().__xnew__(cls, '%s_interpolator_%s' % + (symbol.field.name, _hash(str(tuple(offsets)).encode()).hexdigest()), + symbol.field.dtype) obj.offsets = offsets obj.symbol = symbol obj.field = symbol.field obj.interpolator = symbol.interpolator return obj - def __hash__(self): - return hash((self.symbol, self.field, tuple(self.offsets), self.interpolator)) + def _hashable_contents(self): + return super()._hashable_content() + ((self.symbol, self.field, tuple(self.offsets), self.symbol.interpolator)) def __str__(self): - return '%s_interpolator(%s)' % (self.field.name, ','.join(str(o) for o in self.offsets)) + return '%s_interpolator(%s)' % (self.field.name, ', '.join(str(o) for o in self.offsets)) def __repr__(self): return self.__str__() + def _latex(self, printer, *_): + n = self.field.latex_name if self.field.latex_name else self.field.name + foo = ", ".join(str(printer.doprint(o)) for o in self.offsets) + return f'{n}_{{interpolator}}\\left({foo}\\right)' + + @property + def ndim(self): + return len(self.offsets) + + @property + def is_texture(self): + return isinstance(self.interpolator, TextureCachedField) + def atoms(self, *types): if self.offsets: offsets = set(o for o in self.offsets if isinstance(o, types)) @@ -177,6 +216,11 @@ class InterpolatorAccess(TypedSymbol): else: return set() + def neighbor(self, coord_id, offset): + offset_list = list(self.offsets) + offset_list[coord_id] += offset + return self.interpolator.at(tuple(offset_list)) + @property def free_symbols(self): symbols = set() @@ -189,6 +233,13 @@ class InterpolatorAccess(TypedSymbol): return symbols + @property + def required_global_declarations(self): + required_global_declarations = self.symbol.interpolator.required_global_declarations + if required_global_declarations: + required_global_declarations[0]._symbols_defined.add(self) + return required_global_declarations + @property def args(self): return [self.symbol, *self.offsets] @@ -201,14 +252,27 @@ class InterpolatorAccess(TypedSymbol): def interpolation_mode(self): return self.interpolator.interpolation_mode + @property + def _diff_interpolation_vec(self): + return sp.Matrix([DiffInterpolatorAccess(self.symbol, i, *self.offsets) + for i in range(len(self.offsets))]) + + def diff(self, *symbols, **kwargs): + if symbols == (self,): + return 1 + rtn = self._diff_interpolation_vec.T * sp.Matrix(self.offsets).diff(*symbols, **kwargs) + if rtn.shape == (1, 1): + rtn = rtn[0, 0] + return rtn + def implementation_with_stencils(self): field = self.field default_int_type = create_type('int64') - use_textures = isinstance(self, TextureAccess) + use_textures = isinstance(self.interpolator, TextureCachedField) if use_textures: def absolute_access(x, _): - return self.texture.at((o for o in x)) + return self.symbol.interpolator.at((o for o in x)) else: absolute_access = field.absolute_access @@ -255,7 +319,7 @@ class InterpolatorAccess(TypedSymbol): for (dim, i) in enumerate(index)] index = [cast_func(sp.Piecewise((i, i > 0), (sp.Abs(cast_func(field.shape[dim] - 1 + i, default_int_type)), - True)), default_int_type) + True)), default_int_type) for (dim, i) in enumerate(index)] sum[channel_idx] += weight * \ absolute_access(index, channel_idx if field.index_dimensions else ()) @@ -287,12 +351,61 @@ class InterpolatorAccess(TypedSymbol): # noinspection SpellCheckingInspection __xnew_cached_ = staticmethod(cacheit(__new_stage2__)) + def __getnewargs__(self): + return tuple(self.symbol, *self.offsets) + + +class DiffInterpolatorAccess(InterpolatorAccess): + def __new__(cls, symbol, diff_coordinate_idx, *offsets, **kwargs): + if symbol.interpolator.interpolation_mode == InterpolationMode.LINEAR: + from pystencils.fd import Diff, Discretization2ndOrder + return Discretization2ndOrder(1)(Diff(symbol.interpolator.at(offsets), diff_coordinate_idx)) + obj = DiffInterpolatorAccess.__xnew_cached_(cls, symbol, diff_coordinate_idx, *offsets, **kwargs) + return obj + + def __new_stage2__(self, symbol: sp.Symbol, diff_coordinate_idx, *offsets): + assert offsets is not None + obj = super().__xnew__(self, symbol, *offsets) + obj.diff_coordinate_idx = diff_coordinate_idx + return obj + + def __hash__(self): + return hash((self.symbol, self.field, self.diff_coordinate_idx, tuple(self.offsets), self.interpolator)) + + def __str__(self): + return '%s_diff%i_interpolator(%s)' % (self.field.name, self.diff_coordinate_idx, + ', '.join(str(o) for o in self.offsets)) + + def __repr__(self): + return str(self) + + @property + def args(self): + return [self.symbol, self.diff_coordinate_idx, *self.offsets] + + @property + def symbols_defined(self) -> Set[sp.Symbol]: + return {self} + + @property + def interpolation_mode(self): + return self.interpolator.interpolation_mode + + # noinspection SpellCheckingInspection + __xnew__ = staticmethod(__new_stage2__) + # noinspection SpellCheckingInspection + __xnew_cached_ = staticmethod(cacheit(__new_stage2__)) + + def __getnewargs__(self): + return tuple(self.symbol, self.diff_coordinate_idx, *self.offsets) + + ########################################################################################## # GPU-specific fast specializations (for precision GPUs can also use above nodes/symbols # ########################################################################################## -class TextureCachedField: +class TextureCachedField(Interpolator): def __init__(self, parent_field, address_mode=None, @@ -301,28 +414,19 @@ class TextureCachedField: use_normalized_coordinates=False, read_as_integer=False ): - if isinstance(address_mode, str): - address_mode = getattr(pycuda.driver.address_mode, address_mode.upper()) + super().__init__(parent_field, interpolation_mode, address_mode, use_normalized_coordinates) if address_mode is None: - address_mode = pycuda.driver.address_mode.BORDER + address_mode = 'border' if filter_mode is None: filter_mode = pycuda.driver.filter_mode.LINEAR - # self, field_name, field_type, dtype, layout, shape, strides - self.field = parent_field - self.address_mode = address_mode - self.filter_mode = filter_mode self.read_as_integer = read_as_integer - self.use_normalized_coordinates = use_normalized_coordinates - self.interpolation_mode = interpolation_mode - self.symbol = TypedSymbol(str(self), self.field.dtype.numpy_dtype) - self.symbol.interpolator = self - self.symbol.field = self.field self.required_global_declarations = [TextureDeclaration(self)] - # assert str(self.field.dtype) != 'double', "CUDA does not support double textures!" - # assert dtype_supports_textures(self.field.dtype), "CUDA only supports texture types with 32 bits or less" + @property + def ndim(self): + return self.field.ndim @classmethod def from_interpolator(cls, interpolator: LinearInterpolator): @@ -332,59 +436,17 @@ class TextureCachedField: obj = cls(interpolator.field, interpolator.address_mode, interpolation_mode=interpolator.interpolation_mode) return obj - def at(self, offset): - return TextureAccess(self.symbol, *offset) - - def __getitem__(self, offset): - return TextureAccess(self.symbol, *offset) - def __str__(self): return '%s_texture_%s' % (self.field.name, self.reproducible_hash) def __repr__(self): return self.__str__() - @property - def _hashable_contents(self): - return (type(self), - self.address_mode, - self.filter_mode, - self.read_as_integer, - self.interpolation_mode, - self.use_normalized_coordinates) - - def __hash__(self): - return hash(self._hashable_contents) - @property def reproducible_hash(self): return _hash(str(self._hashable_contents).encode()).hexdigest() -class TextureAccess(InterpolatorAccess): - def __new__(cls, texture_symbol, offsets, *args, **kwargs): - obj = TextureAccess.__xnew_cached_(cls, texture_symbol, offsets, *args, **kwargs) - return obj - - def __new_stage2__(self, symbol, *offsets): - obj = super().__xnew__(self, symbol, *offsets) - obj.required_global_declarations = symbol.interpolator.required_global_declarations - obj.required_global_declarations[0]._symbols_defined.add(obj) - return obj - - def __str__(self): - return '%s_texture(%s)' % (self.interpolator.field.name, ','.join(str(o) for o in self.offsets)) - - @property - def texture(self): - return self.interpolator - - # noinspection SpellCheckingInspection - __xnew__ = staticmethod(__new_stage2__) - # noinspection SpellCheckingInspection - __xnew_cached_ = staticmethod(cacheit(__new_stage2__)) - - class TextureDeclaration(Node): """ A global declaration of a texture. Visible both for device and host code. @@ -421,12 +483,18 @@ class TextureDeclaration(Node): @property def headers(self): - return ['"pycuda-helpers.hpp"'] + headers = ['"pycuda-helpers.hpp"'] + if self.texture.interpolation_mode == InterpolationMode.CUBIC_SPLINE: + headers.append('"cubicTex%iD.cu"' % self.texture.ndim) + return headers def __str__(self): from pystencils.backends.cuda_backend import CudaBackend return CudaBackend()(self) + def __repr__(self): + return str(self) + class TextureObject(TextureDeclaration): """ diff --git a/pystencils/kernel_wrapper.py b/pystencils/kernel_wrapper.py index 0e327711e5a355219cc2664ac9a6c8a02d88bc09..b76ec4e3d79c1b345c9f0a8b2bd8a97b59866b8d 100644 --- a/pystencils/kernel_wrapper.py +++ b/pystencils/kernel_wrapper.py @@ -1,11 +1,14 @@ -""" -Light-weight wrapper around a compiled kernel -""" import pystencils class KernelWrapper: - def __init__(self, kernel, parameters, ast_node): + """ + Light-weight wrapper around a compiled kernel. + + Can be called while still providing access to underlying AST. + """ + + def __init__(self, kernel, parameters, ast_node: pystencils.astnodes.KernelFunction): self.kernel = kernel self.parameters = parameters self.ast = ast_node @@ -16,4 +19,4 @@ class KernelWrapper: @property def code(self): - return str(pystencils.show_code(self.ast)) + return pystencils.get_code_str(self.ast) diff --git a/pystencils/kernelcreation.py b/pystencils/kernelcreation.py index aaca6d3124c4303ae289e831fdb7b7680369c408..2c8aabaa530f985f7b645a41a29a9537a655fa07 100644 --- a/pystencils/kernelcreation.py +++ b/pystencils/kernelcreation.py @@ -79,14 +79,14 @@ def create_kernel(assignments, [0., 0., 0., 0., 0.]]) """ # ---- Normalizing parameters + if isinstance(assignments, Assignment): + assignments = [assignments] assert assignments, "Assignments must not be empty!" split_groups = () if isinstance(assignments, AssignmentCollection): if 'split_groups' in assignments.simplification_hints: split_groups = assignments.simplification_hints['split_groups'] assignments = assignments.all_assignments - if isinstance(assignments, Assignment): - assignments = [assignments] # ---- Creating ast if target == 'cpu': diff --git a/pystencils/math_optimizations.py b/pystencils/math_optimizations.py index ad0114782e15977fb329baacfb6ad4b6bac1e33b..b9420318f1a0a5b7a4aa538c35b6866672282bea 100644 --- a/pystencils/math_optimizations.py +++ b/pystencils/math_optimizations.py @@ -44,3 +44,13 @@ def optimize_assignments(assignments, optimizations): a.optimize(optimizations) return assignments + + +def optimize_ast(ast, optimizations): + + if HAS_REWRITING: + assignments_nodes = ast.atoms(SympyAssignment) + for a in assignments_nodes: + a.optimize(optimizations) + + return ast diff --git a/pystencils/simp/assignment_collection.py b/pystencils/simp/assignment_collection.py index 3a8bb2bd3b315e23fb9e11ef6385f264428c7bd5..706008c859bd0cdc9f2f3b892557242a5aa01184 100644 --- a/pystencils/simp/assignment_collection.py +++ b/pystencils/simp/assignment_collection.py @@ -1,3 +1,4 @@ +import itertools from copy import copy from typing import Any, Dict, Iterable, Iterator, List, Optional, Sequence, Set, Union @@ -43,6 +44,11 @@ class AssignmentCollection: subexpressions = [Assignment(k, v) for k, v in subexpressions.items()] + main_assignments = list(itertools.chain.from_iterable( + [(a if isinstance(a, Iterable) else [a]) for a in main_assignments])) + subexpressions = list(itertools.chain.from_iterable( + [(a if isinstance(a, Iterable) else [a]) for a in subexpressions])) + self.main_assignments = main_assignments self.subexpressions = subexpressions @@ -150,6 +156,9 @@ class AssignmentCollection: """See :func:`count_operations` """ return count_operations(self.all_assignments, only_type=None) + def atoms(self, *args): + return set().union(*[a.atoms(*args) for a in self.all_assignments]) + def dependent_symbols(self, symbols: Iterable[sp.Symbol]) -> Set[sp.Symbol]: """Returns all symbols that depend on one of the passed symbols. diff --git a/pystencils/test_type_interference.py b/pystencils/test_type_interference.py index 0daa6f9d2a36948184d537809457b4aaf8001d29..953b87742304b2d629a1bd564fc23e0982d4f6d9 100644 --- a/pystencils/test_type_interference.py +++ b/pystencils/test_type_interference.py @@ -18,8 +18,7 @@ def test_type_interference(): ast = pystencils.create_kernel(assignments) - code = str(pystencils.show_code(ast)) - print(code) + code = str(pystencils.get_code_str(ast)) assert 'double a' in code assert 'uint16_t b' in code assert 'uint16_t f' in code diff --git a/pystencils/transformations.py b/pystencils/transformations.py index bb089ccb91ea5dfd678d69d6990981fdd28793fc..448991a842f2bd9cee6a47fc559fe16bbf8996ed 100644 --- a/pystencils/transformations.py +++ b/pystencils/transformations.py @@ -14,8 +14,8 @@ import pystencils.astnodes as ast import pystencils.integer_functions from pystencils.assignment import Assignment from pystencils.data_types import ( - PointerType, StructType, TypedImaginaryUnit, TypedSymbol, cast_func, collate_types, create_type, get_base_type, - get_type_of_expression, pointer_arithmetic_func, reinterpret_cast_func) + PointerType, StructType, TypedImaginaryUnit, TypedSymbol, cast_func, collate_types, create_type, + get_base_type, get_type_of_expression, pointer_arithmetic_func, reinterpret_cast_func) from pystencils.field import AbstractField, Field, FieldType from pystencils.kernelparameters import FieldPointerSymbol from pystencils.simp.assignment_collection import AssignmentCollection @@ -1314,7 +1314,7 @@ def implement_interpolations(ast_node: ast.Node, implement_by_texture_accesses: bool = False, vectorize: bool = False, use_hardware_interpolation_for_f32=True): - from pystencils.interpolation_astnodes import InterpolatorAccess, TextureAccess, TextureCachedField + from pystencils.interpolation_astnodes import (InterpolatorAccess, TextureCachedField) # TODO: perform this function on assignments, when unify_shape_symbols allows differently sized fields assert not(implement_by_texture_accesses and vectorize), \ @@ -1322,37 +1322,42 @@ def implement_interpolations(ast_node: ast.Node, FLOAT32_T = create_type('float32') interpolation_accesses = ast_node.atoms(InterpolatorAccess) + if not interpolation_accesses: + return ast_node def can_use_hw_interpolation(i): - return use_hardware_interpolation_for_f32 and i.dtype == FLOAT32_T and isinstance(i, TextureAccess) + return (use_hardware_interpolation_for_f32 + and implement_by_texture_accesses + and i.dtype == FLOAT32_T + and isinstance(i.symbol.interpolator, TextureCachedField)) if implement_by_texture_accesses: - interpolators = {a.symbol.interpolator for a in interpolation_accesses} - to_texture_map = {i: TextureCachedField.from_interpolator(i) for i in interpolators} + for i in interpolation_accesses: + from pystencils.interpolation_astnodes import _InterpolationSymbol - substitutions = {i: to_texture_map[i.symbol.interpolator].at( - [o for o in i.offsets]) for i in interpolation_accesses} - - try: - import pycuda.driver as cuda - for texture in substitutions.values(): - if can_use_hw_interpolation(texture): + try: + import pycuda.driver as cuda + texture = TextureCachedField.from_interpolator(i.interpolator) + if can_use_hw_interpolation(i): texture.filter_mode = cuda.filter_mode.LINEAR else: texture.filter_mode = cuda.filter_mode.POINT texture.read_as_integer = True - except Exception: - pass + except Exception as e: + raise e + i.symbol = _InterpolationSymbol(str(texture), i.symbol.field, texture) - if isinstance(ast_node, AssignmentCollection): - ast_node = ast_node.subs(substitutions) - else: - ast_node.subs(substitutions) + # from pystencils.math_optimizations import ReplaceOptim, optimize_ast - # Update after replacements - interpolation_accesses = ast_node.atoms(InterpolatorAccess) + # ImplementInterpolationByStencils = ReplaceOptim(lambda e: isinstance(e, InterpolatorAccess) + # and not can_use_hw_interpolation(i), + # lambda e: e.implementation_with_stencils() + # ) + # RemoveConjugate = ReplaceOptim(lambda e: isinstance(e, sp.conjugate), + # lambda e: e.args[0] + # ) if vectorize: # TODO can be done in _interpolator_access_to_stencils field.absolute_access == simd_gather raise NotImplementedError() diff --git a/pystencils_tests/test_address_of.py b/pystencils_tests/test_address_of.py index b4c46c1c6045520e30826fe84ac91ae76339ad1b..717f6e43720a4ddddfca8c7a3fce3ff9e15982ff 100644 --- a/pystencils_tests/test_address_of.py +++ b/pystencils_tests/test_address_of.py @@ -17,16 +17,14 @@ def test_address_of(): }, {}) ast = pystencils.create_kernel(assignments) - code = pystencils.show_code(ast) - print(code) + pystencils.show_code(ast) assignments = pystencils.AssignmentCollection({ y[0, 0]: cast_func(address_of(x[0, 0]), create_type('int64')) }, {}) ast = pystencils.create_kernel(assignments) - code = pystencils.show_code(ast) - print(code) + pystencils.show_code(ast) def test_address_of_with_cse(): @@ -39,9 +37,8 @@ def test_address_of_with_cse(): }, {}) ast = pystencils.create_kernel(assignments) - code = pystencils.show_code(ast) + pystencils.show_code(ast) assignments_cse = sympy_cse(assignments) ast = pystencils.create_kernel(assignments_cse) - code = pystencils.show_code(ast) - print(code) + pystencils.show_code(ast) diff --git a/pystencils_tests/test_assignment_collection.py b/pystencils_tests/test_assignment_collection.py index 42e3791ab331cc111343bfe410bc59e3f52ba80b..a16d51db44bb722ee1a9da9a20ad4192d4f6188e 100644 --- a/pystencils_tests/test_assignment_collection.py +++ b/pystencils_tests/test_assignment_collection.py @@ -1,3 +1,4 @@ +import pytest import sympy as sp from pystencils import Assignment, AssignmentCollection @@ -40,3 +41,39 @@ def test_free_and_defined_symbols(): print(ac) print(ac.__repr__) + + +def test_vector_assignments(): + """From #17 (https://i10git.cs.fau.de/pycodegen/pystencils/issues/17)""" + + import pystencils as ps + import sympy as sp + a, b, c = sp.symbols("a b c") + assignments = ps.Assignment(sp.Matrix([a,b,c]), sp.Matrix([1,2,3])) + print(assignments) + + +def test_wrong_vector_assignments(): + """From #17 (https://i10git.cs.fau.de/pycodegen/pystencils/issues/17)""" + + import pystencils as ps + import sympy as sp + a, b = sp.symbols("a b") + + with pytest.raises(AssertionError, + match=r'Matrix(.*) and Matrix(.*) must have same length when performing vector assignment!'): + ps.Assignment(sp.Matrix([a,b]), sp.Matrix([1,2,3])) + + +def test_vector_assignment_collection(): + """From #17 (https://i10git.cs.fau.de/pycodegen/pystencils/issues/17)""" + + import pystencils as ps + import sympy as sp + a, b, c = sp.symbols("a b c") + y, x = sp.Matrix([a,b,c]), sp.Matrix([1,2,3]) + assignments = ps.AssignmentCollection({y: x}) + print(assignments) + + assignments = ps.AssignmentCollection([ps.Assignment(y,x)]) + print(assignments) diff --git a/pystencils_tests/test_boundary.py b/pystencils_tests/test_boundary.py index 096b1348fc59181e14eb15042d8db0098e71c521..23770c8ef6d61c15110f94875b0000c3e7d11fac 100644 --- a/pystencils_tests/test_boundary.py +++ b/pystencils_tests/test_boundary.py @@ -3,6 +3,8 @@ from tempfile import TemporaryDirectory import numpy as np +import pytest + from pystencils import Assignment, create_kernel from pystencils.boundaries import BoundaryHandling, Neumann, add_neumann_boundary from pystencils.datahandling import SerialDataHandling @@ -83,5 +85,6 @@ def test_kernel_vs_copy_boundary(): np.testing.assert_almost_equal(python_copy_result, handling_result) with TemporaryDirectory() as tmp_dir: + pytest.importorskip('pyevtk') boundary_handling.geometry_to_vtk(file_name=os.path.join(tmp_dir, 'test_output1'), ghost_layers=False) boundary_handling.geometry_to_vtk(file_name=os.path.join(tmp_dir, 'test_output2'), ghost_layers=True) diff --git a/pystencils_tests/test_buffer_gpu.py b/pystencils_tests/test_buffer_gpu.py index fc27a2331be1690b8bb1d0da043b47b3ba6fcfc9..f9ee96e9b19c147d95f006de4221980900375cb1 100644 --- a/pystencils_tests/test_buffer_gpu.py +++ b/pystencils_tests/test_buffer_gpu.py @@ -22,6 +22,7 @@ FIELD_SIZES = [(4, 3), (9, 3, 7)] def _generate_fields(dt=np.uint8, stencil_directions=1, layout='numpy'): + pytest.importorskip('pycuda') field_sizes = FIELD_SIZES if stencil_directions > 1: field_sizes = [s + (stencil_directions,) for s in field_sizes] @@ -44,7 +45,6 @@ def _generate_fields(dt=np.uint8, stencil_directions=1, layout='numpy'): return fields -@pytest.mark.gpu def test_full_scalar_field(): """Tests fully (un)packing a scalar field (from)to a GPU buffer.""" fields = _generate_fields() @@ -73,7 +73,6 @@ def test_full_scalar_field(): np.testing.assert_equal(src_arr, dst_arr) -@pytest.mark.gpu def test_field_slice(): """Tests (un)packing slices of a scalar field (from)to a buffer.""" fields = _generate_fields() @@ -109,7 +108,6 @@ def test_field_slice(): np.testing.assert_equal(src_arr[pack_slice], dst_arr[unpack_slice]) -@pytest.mark.gpu def test_all_cell_values(): """Tests (un)packing all cell values of the a field (from)to a buffer.""" num_cell_values = 7 @@ -148,7 +146,6 @@ def test_all_cell_values(): np.testing.assert_equal(src_arr, dst_arr) -@pytest.mark.gpu def test_subset_cell_values(): """Tests (un)packing a subset of cell values of the a field (from)to a buffer.""" num_cell_values = 7 @@ -190,7 +187,6 @@ def test_subset_cell_values(): np.testing.assert_equal(dst_arr, mask_arr.filled(int(0))) -@pytest.mark.gpu def test_field_layouts(): num_cell_values = 7 for layout_str in ['numpy', 'fzyx', 'zyxf', 'reverse_numpy']: diff --git a/pystencils_tests/test_complex_numbers.py b/pystencils_tests/test_complex_numbers.py index 9f9164640f50e417348ee635af620dd891249d77..41dc76a40411129c4ee68f0ee42028b57875d71e 100644 --- a/pystencils_tests/test_complex_numbers.py +++ b/pystencils_tests/test_complex_numbers.py @@ -52,11 +52,14 @@ def test_complex_numbers(assignment, scalar_dtypes, target): ast = pystencils.create_kernel(assignment, target=target, data_type=scalar_dtypes) - code = str(pystencils.show_code(ast)) + code = pystencils.get_code_str(ast) print(code) assert "Not supported" not in code + if target == 'gpu': + pytest.importorskip('pycuda') + kernel = ast.compile() assert kernel is not None @@ -95,11 +98,14 @@ def test_complex_numbers_64(assignment, target): ast = pystencils.create_kernel(assignment, target=target, data_type='double') - code = str(pystencils.show_code(ast)) + code = pystencils.get_code_str(ast) print(code) assert "Not supported" not in code + if target == 'gpu': + pytest.importorskip('pycuda') + kernel = ast.compile() assert kernel is not None @@ -125,6 +131,7 @@ def test_complex_execution(dtype, target, with_complex_argument): }) if target == 'gpu': + pytest.importorskip('pycuda') from pycuda.gpuarray import zeros x_arr = zeros((20, 30), complex_dtype) y_arr = zeros((20, 30), complex_dtype) diff --git a/pystencils_tests/test_conditional_field_access.py b/pystencils_tests/test_conditional_field_access.py index f68b34679ae5de0996a09a30921c85ec49bece58..a4bd53228476ea49f977e08f71acfd1d596231fe 100644 --- a/pystencils_tests/test_conditional_field_access.py +++ b/pystencils_tests/test_conditional_field_access.py @@ -63,7 +63,7 @@ def test_boundary_check(with_cse): print(assignments) kernel_checked = ps.create_kernel(assignments, ghost_layers=0).compile() - print(ps.show_code(kernel_checked)) + ps.show_code(kernel_checked) # No SEGFAULT, please!! kernel_checked(f=f_arr, g=g_arr) diff --git a/pystencils_tests/test_cuda_known_functions.py b/pystencils_tests/test_cuda_known_functions.py index fe33b98df27cc6a6c2ec77eda49e704b9afbec5e..dcdabeaef6934e06b289351127b168d77d803be9 100644 --- a/pystencils_tests/test_cuda_known_functions.py +++ b/pystencils_tests/test_cuda_known_functions.py @@ -1,5 +1,7 @@ import sympy +import pytest + import pystencils from pystencils.astnodes import get_dummy_symbol from pystencils.backends.cuda_backend import CudaSympyPrinter @@ -18,7 +20,8 @@ def test_cuda_known_functions(): }) ast = pystencils.create_kernel(assignments, 'gpu') - print(pystencils.show_code(ast)) + pytest.importorskip('pycuda') + pystencils.show_code(ast) kernel = ast.compile() assert(kernel is not None) @@ -32,7 +35,7 @@ def test_cuda_but_not_c(): }) ast = pystencils.create_kernel(assignments, 'cpu') - print(pystencils.show_code(ast)) + pystencils.show_code(ast) def test_cuda_unknown(): @@ -43,5 +46,4 @@ def test_cuda_unknown(): }) ast = pystencils.create_kernel(assignments, 'gpu') - code = str(pystencils.show_code(ast)) - print(code) + pystencils.show_code(ast) diff --git a/pystencils_tests/test_custom_backends.py b/pystencils_tests/test_custom_backends.py index 1ff775d41355b79c1714a935125f27c261045011..6500076ce278bff4fc2a81f65c55553bd9523d78 100644 --- a/pystencils_tests/test_custom_backends.py +++ b/pystencils_tests/test_custom_backends.py @@ -1,9 +1,9 @@ from subprocess import CalledProcessError -import pycuda.driver import pytest import sympy +import pycuda.driver import pystencils import pystencils.cpu.cpujit import pystencils.gpucuda.cudajit @@ -25,18 +25,28 @@ class ScreamingGpuBackend(CudaBackend): return normal_code.upper() -def test_custom_backends(): +def test_custom_backends_cpu(): z, x, y = pystencils.fields("z, y, x: [2d]") normal_assignments = pystencils.AssignmentCollection([pystencils.Assignment( z[0, 0], x[0, 0] * sympy.log(x[0, 0] * y[0, 0]))], []) ast = pystencils.create_kernel(normal_assignments, target='cpu') - print(pystencils.show_code(ast, ScreamingBackend())) + pystencils.show_code(ast, ScreamingBackend()) with pytest.raises(CalledProcessError): pystencils.cpu.cpujit.make_python_function(ast, custom_backend=ScreamingBackend()) + +def test_custom_backends_gpu(): + pytest.importorskip('pycuda') + import pycuda.driver + + z, x, y = pystencils.fields("z, y, x: [2d]") + + normal_assignments = pystencils.AssignmentCollection([pystencils.Assignment( + z[0, 0], x[0, 0] * sympy.log(x[0, 0] * y[0, 0]))], []) + ast = pystencils.create_kernel(normal_assignments, target='gpu') - print(pystencils.show_code(ast, ScreamingGpuBackend())) + pystencils.show_code(ast, ScreamingGpuBackend()) with pytest.raises(pycuda.driver.CompileError): pystencils.gpucuda.cudajit.make_python_function(ast, custom_backend=ScreamingGpuBackend()) diff --git a/pystencils_tests/test_datahandling.py b/pystencils_tests/test_datahandling.py index 1b79fdbec5aa8bbbfb90450d2014d058065e998b..7f95fe1f04d1bbf72770d2837bf6723681c1f50d 100644 --- a/pystencils_tests/test_datahandling.py +++ b/pystencils_tests/test_datahandling.py @@ -128,6 +128,7 @@ def kernel_execution_jacobi(dh, target): def vtk_output(dh): + pytest.importorskip('pyevtk') dh.add_array('scalar_field') dh.add_array('vector_field', values_per_cell=dh.dim) dh.add_array('multiple_scalar_field', values_per_cell=9) @@ -223,6 +224,7 @@ def test_kernel_param(target): def test_vtk_output(): + pytest.importorskip('pyevtk') for domain_shape in [(4, 5), (3, 4, 5)]: dh = create_data_handling(domain_size=domain_shape, periodicity=True) vtk_output(dh) diff --git a/pystencils_tests/test_fast_approximation.py b/pystencils_tests/test_fast_approximation.py index 76bef174eb7451234dcf347818fdb2d456ce7005..6c9539f64ee2a911123a8fd9f10845a3c67dcbea 100644 --- a/pystencils_tests/test_fast_approximation.py +++ b/pystencils_tests/test_fast_approximation.py @@ -12,7 +12,7 @@ def test_fast_sqrt(): assert len(insert_fast_sqrts(expr).atoms(fast_sqrt)) == 1 assert len(insert_fast_sqrts([expr])[0].atoms(fast_sqrt)) == 1 ast = ps.create_kernel(ps.Assignment(g[0, 0], insert_fast_sqrts(expr)), target='gpu') - code_str = str(ps.show_code(ast)) + code_str = ps.get_code_str(ast) assert '__fsqrt_rn' in code_str expr = ps.Assignment(sp.Symbol("tmp"), 3 / sp.sqrt(f[0, 0] + f[1, 0])) @@ -21,7 +21,7 @@ def test_fast_sqrt(): ac = ps.AssignmentCollection([expr], []) assert len(insert_fast_sqrts(ac).main_assignments[0].atoms(fast_inv_sqrt)) == 1 ast = ps.create_kernel(insert_fast_sqrts(ac), target='gpu') - code_str = str(ps.show_code(ast)) + code_str = ps.get_code_str(ast) assert '__frsqrt_rn' in code_str @@ -34,5 +34,5 @@ def test_fast_divisions(): assert len(insert_fast_divisions(expr).atoms(fast_division)) == 1 ast = ps.create_kernel(ps.Assignment(g[0, 0], insert_fast_divisions(expr)), target='gpu') - code_str = str(ps.show_code(ast)) + code_str = ps.get_code_str(ast) assert '__fdividef' in code_str diff --git a/pystencils_tests/test_field.py b/pystencils_tests/test_field.py index a2813e34f9c72c8bbcd848d21993c181cf31fc43..253ca9f26547d78e6b23b2c6dbc582196b49960d 100644 --- a/pystencils_tests/test_field.py +++ b/pystencils_tests/test_field.py @@ -44,7 +44,7 @@ def test_error_handling(): Field.create_generic('f', spatial_dimensions=2, index_dimensions=1, dtype=struct_dtype) assert 'index dimension' in str(e.value) - arr = np.array([[1, 2.0, 3], [1, 2.0, 3]], dtype=struct_dtype) + arr = np.array([[[(1,)*3, (2,)*3, (3,)*3]]*2], dtype=struct_dtype) Field.create_from_numpy_array('f', arr, index_dimensions=0) with pytest.raises(ValueError) as e: Field.create_from_numpy_array('f', arr, index_dimensions=1) diff --git a/pystencils_tests/test_helpful_errors.py b/pystencils_tests/test_helpful_errors.py new file mode 100644 index 0000000000000000000000000000000000000000..a13178afebd424c0f3e5e3355f8b0ecf97eab870 --- /dev/null +++ b/pystencils_tests/test_helpful_errors.py @@ -0,0 +1,37 @@ +""" + +""" + +import pytest + +from pystencils.astnodes import Block +from pystencils.backends.cbackend import CustomCodeNode, get_headers + + +def test_headers_have_quotes_or_brackets(): + class ErrorNode1(CustomCodeNode): + + def __init__(self): + super().__init__("", [], []) + self.headers = ["iostream"] + + class ErrorNode2(CustomCodeNode): + headers = ["<iostream>", "foo"] + + def __init__(self): + super().__init__("", [], []) + self.headers = ["<iostream>", "foo"] + + class OkNode3(CustomCodeNode): + + def __init__(self): + super().__init__("", [], []) + self.headers = ["<iostream>", '"foo"'] + + with pytest.raises(AssertionError, match='.* does not follow the pattern .*'): + get_headers(Block([ErrorNode1()])) + + with pytest.raises(AssertionError, match='.* does not follow the pattern .*'): + get_headers(ErrorNode2()) + + get_headers(OkNode3()) diff --git a/pystencils_tests/test_interpolation.py b/pystencils_tests/test_interpolation.py index dd4ee06f4ba9197111a04af572df6cdaf2695ab5..477765bb31289dbfdc48927e3cc55f10d49f16a0 100644 --- a/pystencils_tests/test_interpolation.py +++ b/pystencils_tests/test_interpolation.py @@ -11,8 +11,6 @@ import itertools from os.path import dirname, join import numpy as np -import pycuda.autoinit # NOQA -import pycuda.gpuarray as gpuarray import pytest import sympy @@ -83,9 +81,9 @@ def test_scale_interpolation(): ['border', 'clamp', pytest.param('warp', marks=pytest.mark.xfail( - reason="Fails on newer SymPy version due to complex conjugate()")), + reason="requires interpolation-refactoring branch")), pytest.param('mirror', marks=pytest.mark.xfail( - reason="Fails on newer SymPy version due to complex conjugate()")), + reason="requires interpolation-refactoring branch")), ]) def test_rotate_interpolation(address_mode): """ @@ -110,87 +108,90 @@ def test_rotate_interpolation(address_mode): pyconrad.imshow(out, "out " + address_mode) -@pytest.mark.parametrize('address_mode', ['border', 'wrap', 'clamp', 'mirror']) -def test_rotate_interpolation_gpu(address_mode): +@pytest.mark.parametrize('dtype', (np.int32, np.float32, np.float64)) +@pytest.mark.parametrize('address_mode', ('border', 'wrap', 'clamp', 'mirror')) +@pytest.mark.parametrize('use_textures', ('use_textures', False)) +def test_rotate_interpolation_gpu(dtype, address_mode, use_textures): + pytest.importorskip('pycuda') + import pycuda.gpuarray as gpuarray + import pycuda.autoinit # noqa rotation_angle = sympy.pi / 5 scale = 1 - previous_result = None - for dtype in [np.int32, np.float32, np.float64]: - if dtype == np.int32: - lenna_gpu = gpuarray.to_gpu( - np.ascontiguousarray(lenna * 255, dtype)) - else: - lenna_gpu = gpuarray.to_gpu( - np.ascontiguousarray(lenna, dtype)) - for use_textures in [True, False]: - x_f, y_f = pystencils.fields('x,y: %s [2d]' % type_map[dtype], ghost_layers=0) - - transformed = scale * sympy.rot_axis3(rotation_angle)[:2, :2] * \ - sympy.Matrix((x_, y_)) - sympy.Matrix([2, 2]) - assignments = pystencils.AssignmentCollection({ - y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed) - }) - print(assignments) - ast = pystencils.create_kernel(assignments, target='gpu', use_textures_for_interpolation=use_textures) - print(ast) - print(pystencils.show_code(ast)) - kernel = ast.compile() - - out = gpuarray.zeros_like(lenna_gpu) - kernel(x=lenna_gpu, y=out) - pyconrad.imshow(out, - f"out {address_mode} texture:{use_textures} {type_map[dtype]}") - skimage.io.imsave(f"/tmp/out {address_mode} texture:{use_textures} {type_map[dtype]}.tif", - np.ascontiguousarray(out.get(), np.float32)) - if previous_result is not None: - try: - assert np.allclose(previous_result[4:-4, 4:-4], out.get()[4:-4, 4:-4], rtol=100, atol=1e-3) - except AssertionError: # NOQA - print("Max error: %f" % np.max(previous_result - out.get())) - # pyconrad.imshow(previous_result - out.get(), "Difference image") - # raise e - previous_result = out.get() - - -@pytest.mark.parametrize('address_mode', ['border', 'wrap', 'clamp', 'mirror']) -def test_shift_interpolation_gpu(address_mode): + if dtype == np.int32: + lenna_gpu = gpuarray.to_gpu( + np.ascontiguousarray(lenna * 255, dtype)) + else: + lenna_gpu = gpuarray.to_gpu( + np.ascontiguousarray(lenna, dtype)) + x_f, y_f = pystencils.fields('x,y: %s [2d]' % type_map[dtype], ghost_layers=0) + + transformed = scale * \ + sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) - sympy.Matrix([2, 2]) + assignments = pystencils.AssignmentCollection({ + y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed) + }) + print(assignments) + ast = pystencils.create_kernel(assignments, target='gpu', use_textures_for_interpolation=use_textures) + print(ast) + print(pystencils.show_code(ast)) + kernel = ast.compile() + + out = gpuarray.zeros_like(lenna_gpu) + kernel(x=lenna_gpu, y=out) + pyconrad.imshow(out, + f"out {address_mode} texture:{use_textures} {type_map[dtype]}") + + +@pytest.mark.parametrize('address_mode', ['border', 'wrap', + pytest.param('warp', marks=pytest.mark.xfail( + reason="% printed as fmod on old sympy")), + pytest.param('mirror', marks=pytest.mark.xfail( + reason="% printed as fmod on old sympy")), + ]) +@pytest.mark.parametrize('dtype', [np.float64, np.float32, np.int32]) +@pytest.mark.parametrize('use_textures', ('use_textures', False,)) +def test_shift_interpolation_gpu(address_mode, dtype, use_textures): + sver = sympy.__version__.split(".") + if (int(sver[0]) == 1 and int(sver[1]) < 2) and address_mode in ['mirror', 'warp']: + pytest.skip() + pytest.importorskip('pycuda') + + import pycuda.gpuarray as gpuarray + import pycuda.autoinit # noqa rotation_angle = 0 # sympy.pi / 5 scale = 1 # shift = - sympy.Matrix([1.5, 1.5]) shift = sympy.Matrix((0.0, 0.0)) - for dtype in [np.float64, np.float32, np.int32]: - if dtype == np.int32: - lenna_gpu = gpuarray.to_gpu( - np.ascontiguousarray(lenna * 255, dtype)) - else: - lenna_gpu = gpuarray.to_gpu( - np.ascontiguousarray(lenna, dtype)) - for use_textures in [True, False]: - x_f, y_f = pystencils.fields('x,y: %s [2d]' % type_map[dtype], ghost_layers=0) - - if use_textures: - transformed = scale * sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) + shift - else: - transformed = scale * sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) + shift - assignments = pystencils.AssignmentCollection({ - y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed) - }) - # print(assignments) - ast = pystencils.create_kernel(assignments, target='gpu', use_textures_for_interpolation=use_textures) - # print(ast) - print(pystencils.show_code(ast)) - kernel = ast.compile() - - out = gpuarray.zeros_like(lenna_gpu) - kernel(x=lenna_gpu, y=out) - pyconrad.imshow(out, - f"out {address_mode} texture:{use_textures} {type_map[dtype]}") - skimage.io.imsave(f"/tmp/out {address_mode} texture:{use_textures} {type_map[dtype]}.tif", - np.ascontiguousarray(out.get(), np.float32)) + if dtype == np.int32: + lenna_gpu = gpuarray.to_gpu( + np.ascontiguousarray(lenna * 255, dtype)) + else: + lenna_gpu = gpuarray.to_gpu( + np.ascontiguousarray(lenna, dtype)) + + x_f, y_f = pystencils.fields('x,y: %s [2d]' % type_map[dtype], ghost_layers=0) + + if use_textures: + transformed = scale * sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) + shift + else: + transformed = scale * sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) + shift + assignments = pystencils.AssignmentCollection({ + y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed) + }) + # print(assignments) + ast = pystencils.create_kernel(assignments, target='gpu', use_textures_for_interpolation=use_textures) + # print(ast) + print(pystencils.show_code(ast)) + kernel = ast.compile() + + out = gpuarray.zeros_like(lenna_gpu) + kernel(x=lenna_gpu, y=out) + pyconrad.imshow(out, + f"out {address_mode} texture:{use_textures} {type_map[dtype]}") @pytest.mark.parametrize('address_mode', ['border', 'clamp']) @@ -218,7 +219,7 @@ def test_rotate_interpolation_size_change(address_mode): @pytest.mark.parametrize('address_mode, target', - itertools.product(['border', 'wrap', 'clamp', 'mirror'], ['cpu', 'gpu'])) + itertools.product(['border', 'wrap', 'clamp', 'mirror'], ['cpu'])) def test_field_interpolated(address_mode, target): x_f, y_f = pystencils.fields('x,y: float64 [2d]') @@ -226,7 +227,7 @@ def test_field_interpolated(address_mode, target): y_f.center(): x_f.interpolated_access([0.5 * x_ + 2.7, 0.25 * y_ + 7.2], address_mode=address_mode) }) print(assignments) - ast = pystencils.create_kernel(assignments) + ast = pystencils.create_kernel(assignments, target=target) print(ast) print(pystencils.show_code(ast)) kernel = ast.compile() @@ -234,5 +235,11 @@ def test_field_interpolated(address_mode, target): out = np.zeros_like(lenna) kernel(x=lenna, y=out) pyconrad.imshow(out, "out " + address_mode) - kernel(x=lenna, y=out) - pyconrad.imshow(out, "out " + address_mode) + + +def test_spatial_derivative(): + x, y = pystencils.fields('x, y: float32[2d]') + tx, ty = pystencils.fields('t_x, t_y: float32[2d]') + + diff = sympy.diff(x.interpolated_access((tx.center, ty.center)), tx.center) + print("diff: " + str(diff)) diff --git a/pystencils_tests/test_jacobi_cbackend.py b/pystencils_tests/test_jacobi_cbackend.py index 9206aeedaf4bb035444f9280cf58968c9ca385c9..6d86ecb05f920f7a6c54b018ea5dd3508172b727 100644 --- a/pystencils_tests/test_jacobi_cbackend.py +++ b/pystencils_tests/test_jacobi_cbackend.py @@ -1,6 +1,6 @@ import numpy as np -from pystencils import show_code +from pystencils import get_code_obj from pystencils.astnodes import Block, KernelFunction, SympyAssignment from pystencils.cpu import make_python_function from pystencils.field import Field @@ -36,7 +36,7 @@ def test_jacobi_fixed_field_size(): error = np.sum(np.abs(dst_field_py - dst_field_c)) np.testing.assert_allclose(error, 0.0, atol=1e-13) - code_display = show_code(ast_node) + code_display = get_code_obj(ast_node) assert 'for' in str(code_display) assert 'for' in code_display._repr_html_() diff --git a/pystencils_tests/test_jacobi_llvm.py b/pystencils_tests/test_jacobi_llvm.py index 13b22aa4b0235bb98b4789e3b9266bf672985111..95bd52d2edd706fc3e8b6c595c7ad9855ae5e4f6 100644 --- a/pystencils_tests/test_jacobi_llvm.py +++ b/pystencils_tests/test_jacobi_llvm.py @@ -52,7 +52,7 @@ def test_jacobi_fixed_field_size_gpu(): jacobi = Assignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4) ast = create_kernel([jacobi], target='gpu') - print(show_code(ast)) + show_code(ast) for x in range(1, size[0] - 1): for y in range(1, size[1] - 1): diff --git a/pystencils_tests/test_loop_cutting.py b/pystencils_tests/test_loop_cutting.py index cd89f37f6f365b4223e1463db68874f50e81c46d..0291074b87c051bcf381883e474078c663274ac1 100644 --- a/pystencils_tests/test_loop_cutting.py +++ b/pystencils_tests/test_loop_cutting.py @@ -1,6 +1,8 @@ import numpy as np import sympy as sp +import pytest + import pystencils as ps import pystencils.astnodes as ast from pystencils.field import Field, FieldType @@ -57,6 +59,7 @@ def test_staggered_iteration(): sum(f[o] for o in offsets_in_plane(d, -1, dim))) assignments = [ps.Assignment(s.staggered_access(d), expressions[i]) for i, d in enumerate(s.staggered_stencil)] func_optimized = create_staggered_kernel(assignments).compile() + pytest.importorskip('islpy') assert not func_optimized.ast.atoms(Conditional), "Loop cutting optimization did not work" func(f=f_arr, s=s_arr_ref) @@ -99,6 +102,7 @@ def test_staggered_iteration_manual(): move_constants_before_loop(kernel_ast.body) cleanup_blocks(kernel_ast.body) + pytest.importorskip('islpy') assert not kernel_ast.atoms(Conditional), "Loop cutting optimization did not work" func_optimized = make_python_function(kernel_ast) diff --git a/pystencils_tests/test_opencl.py b/pystencils_tests/test_opencl.py index ea5b1ecbc83f21672731b2ecbcb54c78faa1603b..6c013cefed7387827888385feeb11352b96cc8e6 100644 --- a/pystencils_tests/test_opencl.py +++ b/pystencils_tests/test_opencl.py @@ -5,11 +5,13 @@ import sympy as sp import pystencils from pystencils.backends.cuda_backend import CudaBackend from pystencils.backends.opencl_backend import OpenClBackend -from pystencils.opencl.opencljit import get_global_cl_queue, init_globally, make_python_function +from pystencils.opencl.opencljit import get_global_cl_queue, make_python_function try: import pyopencl as cl HAS_OPENCL = True + import pystencils.opencl.autoinit + except Exception: HAS_OPENCL = False @@ -27,10 +29,9 @@ def test_print_opencl(): print(ast) - code = pystencils.show_code(ast, custom_backend=CudaBackend()) - print(code) + pystencils.show_code(ast, custom_backend=CudaBackend()) - opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend()) + opencl_code = pystencils.get_code_str(ast, custom_backend=OpenClBackend()) print(opencl_code) assert "__global double * RESTRICT const _data_x" in str(opencl_code) @@ -108,10 +109,9 @@ def test_opencl_jit(): print(ast) - code = pystencils.show_code(ast, custom_backend=CudaBackend()) - print(code) - opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend()) - print(opencl_code) + pystencils.show_code(ast, custom_backend=CudaBackend()) + + pystencils.show_code(ast, custom_backend=OpenClBackend()) cuda_kernel = ast.compile() assert cuda_kernel is not None @@ -246,14 +246,12 @@ def test_kernel_creation(): print(assignments) - pystencils.opencl.clear_global_ctx() - import pystencils.opencl.autoinit ast = pystencils.create_kernel(assignments, target='opencl') print(ast.backend) - code = str(pystencils.show_code(ast)) + code = pystencils.get_code_str(ast) print(code) assert 'get_local_size' in code diff --git a/pystencils_tests/test_phasefield_dentritic_3D.ipynb b/pystencils_tests/test_phasefield_dentritic_3D.ipynb index aebdeefdddf53e9736feb7d8673c09bc5d8fa6d0..1a01f103891fb229385d8192f263fb53dd837a78 100644 --- a/pystencils_tests/test_phasefield_dentritic_3D.ipynb +++ b/pystencils_tests/test_phasefield_dentritic_3D.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pytest\n", + "pytest.importorskip('pycuda')" + ] + }, { "cell_type": "code", "execution_count": 1, diff --git a/pystencils_tests/test_plot.py b/pystencils_tests/test_plot.py index 6234334bb91352c80c643c474c76c15b968ce1c7..4c9207216266e48d4f2b0b14db1a5e160732c5cb 100644 --- a/pystencils_tests/test_plot.py +++ b/pystencils_tests/test_plot.py @@ -1,5 +1,8 @@ import os from tempfile import TemporaryDirectory +import shutil + +import pytest import numpy as np @@ -20,6 +23,7 @@ def example_vector_field(t=0, shape=(40, 40)): return result +@pytest.mark.skipif(shutil.which('ffmpeg') is None, reason="ffmpeg not available") def test_animation(): t = 0 diff --git a/pystencils_tests/test_print_infinity.py b/pystencils_tests/test_print_infinity.py index 4b1e488822163e684dcad4009fca1b72c7d87d2c..9e2dbd29b18389265b206513ec6c4625e0722a10 100644 --- a/pystencils_tests/test_print_infinity.py +++ b/pystencils_tests/test_print_infinity.py @@ -17,6 +17,9 @@ def test_print_infinity(type, negative, target): assignment = pystencils.Assignment(x.center, oo) ast = pystencils.create_kernel(assignment, data_type=type, target=target) + if target == 'gpu': + pytest.importorskip('pycuda') + ast.compile() print(ast.compile().code) diff --git a/pystencils_tests/test_random.py b/pystencils_tests/test_random.py index 473aa3d1215449f51e70aa3b09f2c605ed13c313..1b3f89f2f81aa6e079f7bc030f27da74e778dd13 100644 --- a/pystencils_tests/test_random.py +++ b/pystencils_tests/test_random.py @@ -1,5 +1,7 @@ import numpy as np +import pytest + import pystencils as ps from pystencils.rng import PhiloxFourFloats, PhiloxTwoDoubles, AESNIFourFloats, AESNITwoDoubles @@ -12,6 +14,9 @@ philox_reference = np.array([[[3576608082, 1252663339, 1987745383, 348040302], def test_philox_double(): for target in ('cpu', 'gpu'): + if target == 'gpu': + pytest.importorskip('pycuda') + dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target=target) f = dh.add_array("f", values_per_cell=2) @@ -39,6 +44,9 @@ def test_philox_double(): def test_philox_float(): for target in ('cpu', 'gpu'): + if target == 'gpu': + pytest.importorskip('pycuda') + dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target=target) f = dh.add_array("f", values_per_cell=4) diff --git a/pystencils_tests/test_source_code_comment.py b/pystencils_tests/test_source_code_comment.py index 6cc66feb6f7f92174029e274feee22dfc4455b8d..16e6e5a9647843db4483b951aa631b591240dac5 100644 --- a/pystencils_tests/test_source_code_comment.py +++ b/pystencils_tests/test_source_code_comment.py @@ -27,4 +27,5 @@ def test_source_code_comment(): print(ast) compiled = ast.compile() assert compiled is not None - print(pystencils.show_code(ast)) + + pystencils.show_code(ast) diff --git a/pystencils_tests/test_staggered_kernel.py b/pystencils_tests/test_staggered_kernel.py index 310220d21442dc1d1a24b0e74e50f3e068374b67..c4f491393d2b4d08fbb9b4c99356abdc7e7199b6 100644 --- a/pystencils_tests/test_staggered_kernel.py +++ b/pystencils_tests/test_staggered_kernel.py @@ -1,6 +1,8 @@ import numpy as np import sympy as sp +import pytest + import pystencils as ps @@ -88,6 +90,7 @@ def test_staggered_subexpressions(): def test_staggered_loop_cutting(): + pytest.importorskip('islpy') dh = ps.create_data_handling((4, 4), periodicity=True, default_target='cpu') j = dh.add_array('j', values_per_cell=4, field_type=ps.FieldType.STAGGERED) assignments = [ps.Assignment(j.staggered_access("SW"), 1)] diff --git a/pystencils_tests/test_sum_prod.py b/pystencils_tests/test_sum_prod.py index 4fa5c0618612b013edda4d164dd035dafdd2438a..4b4cd7131ad4b860c754865cd35f3960399e9b5a 100644 --- a/pystencils_tests/test_sum_prod.py +++ b/pystencils_tests/test_sum_prod.py @@ -30,7 +30,7 @@ def test_sum(): }) ast = pystencils.create_kernel(assignments) - code = str(pystencils.show_code(ast)) + code = str(pystencils.get_code_obj(ast)) kernel = ast.compile() print(code) @@ -58,11 +58,11 @@ def test_sum_use_float(): }) ast = pystencils.create_kernel(assignments, data_type=create_type('float32')) - code = str(pystencils.show_code(ast)) + code = str(pystencils.get_code_obj(ast)) kernel = ast.compile() print(code) - print(pystencils.show_code(ast)) + print(pystencils.get_code_obj(ast)) assert 'float sum' in code array = np.zeros((10,), np.float32) @@ -89,7 +89,7 @@ def test_product(): }) ast = pystencils.create_kernel(assignments) - code = str(pystencils.show_code(ast)) + code = pystencils.get_code_str(ast) kernel = ast.compile() print(code) @@ -120,11 +120,9 @@ def test_prod_var_limit(): }) ast = pystencils.create_kernel(assignments) - code = str(pystencils.show_code(ast)) + pystencils.show_code(ast) kernel = ast.compile() - print(code) - array = np.zeros((10,), np.int64) kernel(x=array, limit=100) diff --git a/pystencils_tests/test_sympy_optimizations.py b/pystencils_tests/test_sympy_optimizations.py index 745b936ea7d0e3ce5da6c684dee42c56c09dd835..b22ad9d7fb07c84b88388455b45e76f6b5decc3a 100644 --- a/pystencils_tests/test_sympy_optimizations.py +++ b/pystencils_tests/test_sympy_optimizations.py @@ -16,9 +16,10 @@ def test_sympy_optimizations(): }) assignments = optimize_assignments(assignments, optims_pystencils_cpu) + print(assignments) ast = pystencils.create_kernel(assignments, target=target) - code = str(pystencils.show_code(ast)) + code = pystencils.get_code_str(ast) assert 'expm1(' in code @@ -35,7 +36,7 @@ def test_evaluate_constant_terms(): assignments = optimize_assignments(assignments, optims_pystencils_cpu) ast = pystencils.create_kernel(assignments, target=target) - code = str(pystencils.show_code(ast)) + code = pystencils.get_code_str(ast) assert 'cos(' not in code print(code) @@ -55,6 +56,6 @@ def test_do_not_evaluate_constant_terms(): optimize_assignments(assignments, optimizations) ast = pystencils.create_kernel(assignments, target=target) - code = str(pystencils.show_code(ast)) + code = pystencils.get_code_str(ast) assert 'cos(' in code print(code) diff --git a/pytest.ini b/pytest.ini index 4e9f5caddfef20639da974ad9327e53fa8b6ced4..0070425966626449378fcd1ff9eabe468256a112 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,7 +3,6 @@ python_files = test_*.py *_test.py scenario_*.py norecursedirs = *.egg-info .git .cache .ipynb_checkpoints htmlcov addopts = --doctest-modules --durations=20 --cov-config pytest.ini markers = - gpu: test that require a gpu kerncraft: tests depending on kerncraft [run] diff --git a/setup.py b/setup.py index 83a76f870f02f64d8a371f294c1224c15c919061..8eeeff7b2e4a324feb7dc31fc9b8605254ae2da0 100644 --- a/setup.py +++ b/setup.py @@ -112,7 +112,7 @@ setup(name='pystencils', 'opencl': ['pyopencl'], 'alltrafos': ['islpy', 'py-cpuinfo'], 'bench_db': ['blitzdb', 'pymongo', 'pandas'], - 'interactive': ['matplotlib', 'ipy_table', 'imageio', 'jupyter', 'pyevtk'], + 'interactive': ['matplotlib', 'ipy_table', 'imageio', 'jupyter', 'pyevtk', 'rich'], 'autodiff': ['pystencils-autodiff'], 'doc': ['sphinx', 'sphinx_rtd_theme', 'nbsphinx', 'sphinxcontrib-bibtex', 'sphinx_autodoc_typehints', 'pandoc'],