Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 1754 additions and 17 deletions
# -*- coding: utf-8 -*-
#
# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
#
# Distributed under terms of the GPLv3 license.
"""
"""
import sympy
import pystencils
from pystencils.backends.json import print_json, print_yaml, write_json, write_yaml
import tempfile
def test_json_backend():
z, y, x = pystencils.fields("z, y, x: [20,40]")
a = sympy.Symbol('a')
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * a * x[0, 0] * y[0, 0]
})
ast = pystencils.create_kernel(assignments)
pj = print_json(ast)
# print(pj)
py = print_yaml(ast)
# print(py)
temp_dir = tempfile.TemporaryDirectory()
write_json(temp_dir.name + '/test.json', ast)
write_yaml(temp_dir.name + '/test.yaml', ast)
"""
Test the pystencils-specific JSON encoder and serializer as used in the Database class.
"""
import numpy as np
import tempfile
from pystencils.config import CreateKernelConfig
from pystencils import Target, Field
from pystencils.runhelper.db import Database, PystencilsJsonSerializer
def test_json_serializer():
dtype = np.float32
index_arr = np.zeros((3,), dtype=dtype)
indexed_field = Field.create_from_numpy_array('index', index_arr)
# create pystencils config
config = CreateKernelConfig(target=Target.CPU, function_name='dummy_config', data_type=dtype,
index_fields=[indexed_field])
# create dummy database
temp_dir = tempfile.TemporaryDirectory()
db = Database(file=temp_dir.name, serializer_info=('pystencils_serializer', PystencilsJsonSerializer))
db.save(params={'config': config}, result={'test': 'dummy'})
%% Cell type:code id: tags:
``` python
from pystencils.session import *
```
%% Cell type:code id: tags:
``` python
dh = ps.create_data_handling(domain_size=(256, 256), periodicity=True)
c_field = dh.add_array('c')
dh.fill("c", 0.0, ghost_layers=True)
```
%% Cell type:code id: tags:
``` python
for x in range(129):
for y in range(258):
dh.cpu_arrays['c'][x, y] = 1.0
```
%% Cell type:code id: tags:
``` python
plt.scalar_field(dh.cpu_arrays["c"])
```
%% Output
<matplotlib.image.AxesImage at 0x117081c10>
%% Cell type:code id: tags:
``` python
ur = ps.Assignment(c_field[0, 0], c_field[1, 0])
config = ps.CreateKernelConfig(target=dh.default_target, cpu_openmp=False, skip_independence_check=True)
ast = ps.create_kernel(ur, config=config)
kernel = ast.compile()
```
%% Cell type:code id: tags:
``` python
c_sync = dh.synchronization_function_cpu(['c'])
```
%% Cell type:code id: tags:
``` python
def timeloop(steps=10):
for i in range(steps):
c_sync()
dh.run_kernel(kernel)
return dh.gather_array('c')
```
%% Cell type:code id: tags:
``` python
ps.jupyter.set_display_mode('video')
```
%% Cell type:code id: tags:
``` python
ani = ps.plot.scalar_field_animation(timeloop, rescale=True, frames=12)
ps.jupyter.display_animation(ani)
```
%% Output
<IPython.core.display.HTML object>
%% Cell type:code id: tags:
``` python
ps.jupyter.set_display_mode('image_update')
```
%% Cell type:code id: tags:
``` python
ani = ps.plot.scalar_field_animation(timeloop, rescale=True, frames=12)
ps.jupyter.display_animation(ani)
```
%% Output
%% Cell type:code id: tags:
``` python
def grid_update_function(image):
for i in range(40):
c_sync()
dh.run_kernel(kernel)
return dh.gather_array('c')
```
%% Cell type:code id: tags:
``` python
animation = ps.jupyter.make_imshow_animation(dh.cpu_arrays["c"], grid_update_function, frames=300)
```
%% Output
%% Cell type:code id: tags:
``` python
ps.jupyter.set_display_mode("video")
ps.jupyter.set_display_mode("window")
ps.jupyter.set_display_mode("image_update")
ps.jupyter.activate_ipython()
```
%% Output
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
Cell In[14], line 2
1 ps.jupyter.set_display_mode("video")
----> 2 ps.jupyter.set_display_mode("window")
3 ps.jupyter.set_display_mode("image_update")
4 ps.jupyter.activate_ipython()
File ~/pystencils/pystencils/src/pystencils/jupyter.py:115, in set_display_mode(mode)
113 display_animation_func = display_as_html_video
114 elif animation_display_mode == 'window':
--> 115 ipython.magic("matplotlib qt")
116 display_animation_func = display_in_extra_window
117 elif animation_display_mode == 'image_update':
File /opt/local/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/IPython/core/interactiveshell.py:2539, in InteractiveShell.magic(self, arg_s)
2537 magic_name, _, magic_arg_s = arg_s.partition(' ')
2538 magic_name = magic_name.lstrip(prefilter.ESC_MAGIC)
-> 2539 return self.run_line_magic(magic_name, magic_arg_s, _stack_depth=2)
File /opt/local/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/IPython/core/interactiveshell.py:2417, in InteractiveShell.run_line_magic(self, magic_name, line, _stack_depth)
2415 kwargs['local_ns'] = self.get_local_scope(stack_depth)
2416 with self.builtin_trap:
-> 2417 result = fn(*args, **kwargs)
2419 # The code below prevents the output from being displayed
2420 # when using magics with decodator @output_can_be_silenced
2421 # when the last Python token in the expression is a ';'.
2422 if getattr(fn, magic.MAGIC_OUTPUT_CAN_BE_SILENCED, False):
File /opt/local/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/IPython/core/magics/pylab.py:99, in PylabMagics.matplotlib(self, line)
97 print("Available matplotlib backends: %s" % backends_list)
98 else:
---> 99 gui, backend = self.shell.enable_matplotlib(args.gui.lower() if isinstance(args.gui, str) else args.gui)
100 self._show_matplotlib_backend(args.gui, backend)
File /opt/local/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/IPython/core/interactiveshell.py:3603, in InteractiveShell.enable_matplotlib(self, gui)
3599 print('Warning: Cannot change to a different GUI toolkit: %s.'
3600 ' Using %s instead.' % (gui, self.pylab_gui_select))
3601 gui, backend = pt.find_gui_and_backend(self.pylab_gui_select)
-> 3603 pt.activate_matplotlib(backend)
3604 configure_inline_support(self, backend)
3606 # Now we must activate the gui pylab wants to use, and fix %run to take
3607 # plot updates into account
File /opt/local/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/IPython/core/pylabtools.py:360, in activate_matplotlib(backend)
355 # Due to circular imports, pyplot may be only partially initialised
356 # when this function runs.
357 # So avoid needing matplotlib attribute-lookup to access pyplot.
358 from matplotlib import pyplot as plt
--> 360 plt.switch_backend(backend)
362 plt.show._needmain = False
363 # We need to detect at runtime whether show() is called by the user.
364 # For this, we wrap it into a decorator which adds a 'called' flag.
File /opt/local/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/matplotlib/pyplot.py:271, in switch_backend(newbackend)
268 # have to escape the switch on access logic
269 old_backend = dict.__getitem__(rcParams, 'backend')
--> 271 backend_mod = importlib.import_module(
272 cbook._backend_module_name(newbackend))
274 required_framework = _get_required_interactive_framework(backend_mod)
275 if required_framework is not None:
File /opt/local/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/importlib/__init__.py:126, in import_module(name, package)
124 break
125 level += 1
--> 126 return _bootstrap._gcd_import(name[level:], package, level)
File <frozen importlib._bootstrap>:1204, in _gcd_import(name, package, level)
File <frozen importlib._bootstrap>:1176, in _find_and_load(name, import_)
File <frozen importlib._bootstrap>:1147, in _find_and_load_unlocked(name, import_)
File <frozen importlib._bootstrap>:690, in _load_unlocked(spec)
File <frozen importlib._bootstrap_external>:940, in exec_module(self, module)
File <frozen importlib._bootstrap>:241, in _call_with_frames_removed(f, *args, **kwds)
File /opt/local/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/matplotlib/backends/backend_qt5agg.py:7
4 from .. import backends
6 backends._QT_FORCE_QT5_BINDING = True
----> 7 from .backend_qtagg import ( # noqa: F401, E402 # pylint: disable=W0611
8 _BackendQTAgg, FigureCanvasQTAgg, FigureManagerQT, NavigationToolbar2QT,
9 FigureCanvasAgg, FigureCanvasQT)
12 @_BackendQTAgg.export
13 class _BackendQT5Agg(_BackendQTAgg):
14 pass
File /opt/local/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/matplotlib/backends/backend_qtagg.py:9
5 import ctypes
7 from matplotlib.transforms import Bbox
----> 9 from .qt_compat import QT_API, _enum
10 from .backend_agg import FigureCanvasAgg
11 from .backend_qt import QtCore, QtGui, _BackendQT, FigureCanvasQT
File /opt/local/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/matplotlib/backends/qt_compat.py:135
133 break
134 else:
--> 135 raise ImportError(
136 "Failed to import any of the following Qt binding modules: {}"
137 .format(", ".join(_ETS.values())))
138 else: # We should not get there.
139 raise AssertionError(f"Unexpected QT_API: {QT_API}")
ImportError: Failed to import any of the following Qt binding modules: PyQt6, PySide6, PyQt5, PySide2
import pytest
import numpy as np
import sympy as sp
import pystencils as ps
@pytest.mark.parametrize('dtype', ["float64", "float32"])
def test_log(dtype):
a = sp.Symbol("a")
x = ps.fields(f'x: {dtype}[1d]')
assignments = ps.AssignmentCollection({x.center(): sp.log(a)})
ast = ps.create_kernel(assignments)
code = ps.get_code_str(ast)
kernel = ast.compile()
# ps.show_code(ast)
if dtype == "float64":
assert "float" not in code
array = np.zeros((10,), dtype=dtype)
kernel(x=array, a=100)
assert np.allclose(array, 4.60517019)
import sympy as sp
import numpy as np
import sympy as sp
import pytest
import pystencils as ps
from pystencils import Field
from pystencils.cpu import create_kernel, make_python_function
import pystencils.astnodes as ast
from pystencils.field import Field, FieldType
from pystencils.astnodes import Conditional, LoopOverCoordinate, SympyAssignment
from pystencils.cpu import create_kernel, make_python_function
from pystencils.kernelcreation import create_staggered_kernel
from pystencils.transformations import move_constants_before_loop
import pystencils.astnodes as ast
from pystencils.transformations import simplify_conditionals, cleanup_blocks, cut_loop
from pystencils.transformations import (
cleanup_blocks, cut_loop, move_constants_before_loop, simplify_conditionals)
def offsets_in_plane(normal_plane, offset_int, dimension):
......@@ -33,9 +36,9 @@ def test_staggered_iteration():
s_arr_ref = s_arr.copy()
fields_fixed = (Field.create_from_numpy_array('f', f_arr),
Field.create_from_numpy_array('s', s_arr, index_dimensions=1))
Field.create_from_numpy_array('s', s_arr, index_dimensions=1, field_type=FieldType.STAGGERED))
fields_var = (Field.create_generic('f', 2),
Field.create_generic('s', 2, index_dimensions=1))
Field.create_generic('s', 2, index_dimensions=1, index_shape=(dim,), field_type=FieldType.STAGGERED))
for f, s in [fields_var, fields_fixed]:
# --- Manual
......@@ -47,14 +50,18 @@ def test_staggered_iteration():
sum(f[o] for o in offsets_in_plane(d, -1, dim)))
cond = sp.And(*[conditions[i] for i in range(dim) if d != i])
eqs.append(Conditional(cond, eq))
func = create_kernel(eqs, ghost_layers=[(1, 0), (1, 0), (1, 0)]).compile()
# TODO: correct type hint
config = ps.CreateKernelConfig(target=ps.Target.CPU, ghost_layers=[(1, 0), (1, 0), (1, 0)])
func = ps.create_kernel(eqs, config=config).compile()
# --- Built-in optimized
expressions = []
for d in range(dim):
expressions.append(sum(f[o] for o in offsets_in_plane(d, 0, dim)) -
sum(f[o] for o in offsets_in_plane(d, -1, dim)))
func_optimized = create_staggered_kernel(s, expressions).compile()
assignments = [ps.Assignment(s.staggered_access(d), expressions[i]) for i, d in enumerate(s.staggered_stencil)]
func_optimized = create_staggered_kernel(assignments).compile()
pytest.importorskip('islpy')
assert not func_optimized.ast.atoms(Conditional), "Loop cutting optimization did not work"
func(f=f_arr, s=s_arr_ref)
......@@ -69,12 +76,13 @@ def test_staggered_iteration_manual():
s_arr_ref = s_arr.copy()
f = Field.create_from_numpy_array('f', f_arr)
s = Field.create_from_numpy_array('s', s_arr, index_dimensions=1)
s = Field.create_from_numpy_array('s', s_arr, index_dimensions=1, field_type=FieldType.STAGGERED)
eqs = []
counters = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)]
conditions = [counters[i] < f.shape[i] - 1 for i in range(dim)]
conditions2 = counters[0] > f.shape[0] + 5
for d in range(dim):
eq = SympyAssignment(s(d), sum(f[o] for o in offsets_in_plane(d, 0, dim)) -
......@@ -82,7 +90,13 @@ def test_staggered_iteration_manual():
cond = sp.And(*[conditions[i] for i in range(dim) if d != i])
eqs.append(Conditional(cond, eq))
kernel_ast = create_kernel(eqs, ghost_layers=[(1, 0), (1, 0), (1, 0)])
# this conditional should vanish entirely because it is never true
eq = SympyAssignment(s(0), f[0, 0])
cond = sp.And(*[conditions2])
eqs.append(Conditional(cond, eq))
config = ps.CreateKernelConfig(target=ps.Target.CPU, ghost_layers=[(1, 0), (1, 0), (1, 0)])
kernel_ast = ps.create_kernel(eqs, config=config)
func = make_python_function(kernel_ast)
func(f=f_arr, s=s_arr_ref)
......@@ -97,6 +111,7 @@ def test_staggered_iteration_manual():
move_constants_before_loop(kernel_ast.body)
cleanup_blocks(kernel_ast.body)
pytest.importorskip('islpy')
assert not kernel_ast.atoms(Conditional), "Loop cutting optimization did not work"
func_optimized = make_python_function(kernel_ast)
......@@ -106,11 +121,14 @@ def test_staggered_iteration_manual():
def test_staggered_gpu():
dim = 2
f, s = ps.fields("f, s({dim}): double[{dim}D]".format(dim=dim))
f = ps.fields(f"f: double[{dim}D]")
s = ps.fields("s({dim}): double[{dim}D]".format(dim=dim), field_type=FieldType.STAGGERED)
expressions = [(f[0, 0] + f[-1, 0]) / 2,
(f[0, 0] + f[0, -1]) / 2]
kernel_ast = ps.create_staggered_kernel(s, expressions, target='gpu', gpu_exclusive_conditions=True)
assignments = [ps.Assignment(s.staggered_access(d), expressions[i]) for i, d in enumerate(s.staggered_stencil)]
kernel_ast = ps.create_staggered_kernel(assignments, target=ps.Target.GPU, gpu_exclusive_conditions=True)
assert len(kernel_ast.atoms(Conditional)) == 4
kernel_ast = ps.create_staggered_kernel(s, expressions, target='gpu', gpu_exclusive_conditions=False)
assignments = [ps.Assignment(s.staggered_access(d), expressions[i]) for i, d in enumerate(s.staggered_stencil)]
kernel_ast = ps.create_staggered_kernel(assignments, target=ps.Target.GPU, gpu_exclusive_conditions=False)
assert len(kernel_ast.atoms(Conditional)) == 3
# -*- coding: utf-8 -*-
#
# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
#
# Distributed under terms of the GPLv3 license.
"""
"""
import sympy as sp
import pystencils
from pystencils.typing import TypedSymbol, BasicType
def test_wild_typed_symbol():
x = pystencils.fields('x: float32[3d]')
typed_symbol = TypedSymbol('a', BasicType('float64'))
assert x.center().match(sp.Wild('w1'))
assert typed_symbol.match(sp.Wild('w1'))
wild_ceiling = sp.ceiling(sp.Wild('w1'))
assert sp.ceiling(x.center()).match(wild_ceiling)
assert sp.ceiling(typed_symbol).match(wild_ceiling)
def test_replace_and_subs_for_assignment_collection():
x, y = pystencils.fields('x, y: float32[3d]')
a, b, c, d = sp.symbols('a, b, c, d')
assignments = pystencils.AssignmentCollection({
a: sp.floor(1),
b: 2,
c: a + c,
y.center(): sp.ceiling(x.center()) + sp.floor(x.center())
})
expected_assignments = pystencils.AssignmentCollection({
a: sp.floor(3),
b: 2,
c: a + c,
y.center(): sp.ceiling(x.center()) + sp.floor(x.center())
})
assert expected_assignments == assignments.replace(1, 3)
assert expected_assignments == assignments.subs({1: 3})
expected_assignments = pystencils.AssignmentCollection({
d: sp.floor(1),
b: 2,
c: d + c,
y.center(): sp.ceiling(x.center()) + sp.floor(x.center())
})
print(expected_assignments)
print(assignments.subs(a, d))
assert expected_assignments == assignments.subs(a, d)
def test_match_for_assignment_collection():
x, y = pystencils.fields('x, y: float32[3d]')
a, b, c, d = sp.symbols('a, b, c, d')
assignments = pystencils.AssignmentCollection({
a: sp.floor(1),
b: 2,
c: a + c,
y.center(): sp.ceiling(x.center()) + sp.floor(x.center())
})
w1 = sp.Wild('w1')
w2 = sp.Wild('w2')
w3 = sp.Wild('w3')
wild_ceiling = sp.ceiling(w1)
wild_addition = w1 + w2
assert assignments.match(pystencils.Assignment(w3, wild_ceiling + w2))[w1] == x.center()
assert assignments.match(pystencils.Assignment(w3, wild_ceiling + w2)) == {
w3: y.center(),
w2: sp.floor(x.center()),
w1: x.center()
}
assert assignments.find(wild_ceiling) == {sp.ceiling(x.center())}
assert len([a for a in assignments.find(wild_addition) if isinstance(a, sp.Add)]) == 2
import pytest
import sympy as sp
import numpy as np
import pystencils as ps
from pystencils.fast_approximation import fast_division
@pytest.mark.parametrize('dtype', ["float64", "float32"])
@pytest.mark.parametrize('func', [sp.Pow, sp.atan2])
@pytest.mark.parametrize('target', [ps.Target.CPU, ps.Target.GPU])
def test_two_arguments(dtype, func, target):
if target == ps.Target.GPU:
pytest.importorskip("cupy")
dh = ps.create_data_handling(domain_size=(10, 10), periodicity=True, default_target=target)
x = dh.add_array('x', values_per_cell=1, dtype=dtype)
dh.fill("x", 0.0, ghost_layers=True)
y = dh.add_array('y', values_per_cell=1, dtype=dtype)
dh.fill("y", 1.0, ghost_layers=True)
z = dh.add_array('z', values_per_cell=1, dtype=dtype)
dh.fill("z", 2.0, ghost_layers=True)
config = ps.CreateKernelConfig(target=target)
# test sp.Max with one argument
up = ps.Assignment(x.center, func(y.center, z.center))
ast = ps.create_kernel(up, config=config)
code = ps.get_code_str(ast)
if dtype == 'float32':
assert func.__name__.lower() in code
kernel = ast.compile()
dh.all_to_gpu()
dh.run_kernel(kernel)
dh.all_to_cpu()
np.testing.assert_allclose(dh.gather_array("x")[0, 0], float(func(1.0, 2.0).evalf()),
13 if dtype == 'float64' else 5)
@pytest.mark.parametrize('dtype', ["float64", "float32"])
@pytest.mark.parametrize('func', [sp.sin, sp.cos, sp.sinh, sp.cosh, sp.atan, sp.floor, sp.ceiling])
@pytest.mark.parametrize('target', [ps.Target.CPU, ps.Target.GPU])
def test_single_arguments(dtype, func, target):
if target == ps.Target.GPU:
pytest.importorskip("cupy")
dh = ps.create_data_handling(domain_size=(10, 10), periodicity=True, default_target=target)
x = dh.add_array('x', values_per_cell=1, dtype=dtype)
dh.fill("x", 0.0, ghost_layers=True)
y = dh.add_array('y', values_per_cell=1, dtype=dtype)
dh.fill("y", 1.0, ghost_layers=True)
config = ps.CreateKernelConfig(target=target)
# test sp.Max with one argument
up = ps.Assignment(x.center, func(y.center))
ast = ps.create_kernel(up, config=config)
code = ps.get_code_str(ast)
if dtype == 'float32':
func_name = func.__name__.lower() if func is not sp.ceiling else "ceil"
assert func_name in code
kernel = ast.compile()
dh.all_to_gpu()
dh.run_kernel(kernel)
dh.all_to_cpu()
np.testing.assert_allclose(dh.gather_array("x")[0, 0], float(func(1.0).evalf()),
rtol=10**-3 if dtype == 'float32' else 10**-5)
@pytest.mark.parametrize('a', [sp.Symbol('a'), ps.fields('a: float64[2d]').center])
def test_avoid_pow(a):
x = ps.fields('x: float64[2d]')
up = ps.Assignment(x.center_vector[0], 2 * a ** 2 / 3)
ast = ps.create_kernel(up)
code = ps.get_code_str(ast)
assert "pow" not in code
def test_avoid_pow_fast_div():
x = ps.fields('x: float64[2d]')
a = ps.fields('a: float64[2d]').center
up = ps.Assignment(x.center_vector[0], fast_division(1, (a**2)))
ast = ps.create_kernel(up, config=ps.CreateKernelConfig(target=ps.Target.GPU))
# ps.show_code(ast)
code = ps.get_code_str(ast)
assert "pow" not in code
def test_avoid_pow_move_constants():
# At the end of the kernel creation the function move_constants_before_loop will be called
# This function additionally contains substitutions for symbols with the same value
# Thus it simplifies the equations again
x = ps.fields('x: float64[2d]')
a, b, c = sp.symbols("a, b, c")
up = [ps.Assignment(a, 0.0),
ps.Assignment(b, 0.0),
ps.Assignment(c, 0.0),
ps.Assignment(x.center_vector[0], a**2/18 - a*b/6 - a/18 + b**2/18 + b/18 - c**2/36)]
ast = ps.create_kernel(up)
code = ps.get_code_str(ast)
ps.show_code(ast)
assert "pow" not in code
import pytest
import numpy as np
import sympy as sp
import pystencils as ps
from pystencils.astnodes import LoopOverCoordinate, Conditional, Block, SympyAssignment
SLICE_LIST = [False,
ps.make_slice[1:-1:2, 1:-1:2],
ps.make_slice[2:-1:2, 4:-1:7],
ps.make_slice[4:-1:2, 5:-1:2],
ps.make_slice[3:-1:4, 7:-1:3]]
@pytest.mark.parametrize('target', [ps.Target.CPU, ps.Target.GPU])
@pytest.mark.parametrize('iteration_slice', SLICE_LIST)
def test_mod(target, iteration_slice):
if target == ps.Target.GPU:
pytest.importorskip("cupy")
dh = ps.create_data_handling(domain_size=(51, 51), periodicity=True, default_target=target)
loop_ctrs = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dh.dim)]
cond = [sp.Eq(sp.Mod(loop_ctrs[i], 2), 1) for i in range(dh.dim)]
field = dh.add_array("a", values_per_cell=1)
eq_list = [SympyAssignment(field.center, 1.0)]
if iteration_slice:
config = ps.CreateKernelConfig(target=dh.default_target, iteration_slice=iteration_slice)
assign = eq_list
else:
assign = [Conditional(sp.And(*cond), Block(eq_list))]
config = ps.CreateKernelConfig(target=dh.default_target)
kernel = ps.create_kernel(assign, config=config).compile()
dh.fill(field.name, 0, ghost_layers=True)
if config.target == ps.enums.Target.GPU:
dh.to_gpu(field.name)
dh.run_kernel(kernel)
if config.target == ps.enums.Target.GPU:
dh.to_cpu(field.name)
result = dh.gather_array(field.name, ghost_layers=True)
assert np.all(result[iteration_slice] == 1.0)
import numpy as np
import pystencils as ps
from pystencils.astnodes import Block, LoopOverCoordinate, SympyAssignment, TypedSymbol
from pystencils.transformations import move_constants_before_loop
def test_symbol_renaming():
"""When two loops have assignments to the same symbol with different rhs and both
are pulled before the loops, one of them has to be renamed
"""
f, g = ps.fields("f, g : double[2D]")
a, b, c = [TypedSymbol(n, np.float64) for n in ('a', 'b', 'c')]
loop1 = LoopOverCoordinate(Block([SympyAssignment(c, a + b),
SympyAssignment(g[0, 0], f[0, 0] + c)]),
0, 0, 10)
loop2 = LoopOverCoordinate(Block([SympyAssignment(c, a ** 2 + b ** 2),
SympyAssignment(g[0, 0], f[0, 0] + c)]),
0, 0, 10)
block = Block([loop1, loop2])
move_constants_before_loop(block)
loops = block.atoms(LoopOverCoordinate)
assert len(loops) == 2
assert len(block.args[1].body.args) == 1
assert len(block.args[3].body.args) == 2
for loop in loops:
assert len(loop.parent.args) == 4 # 2 loops + 2 subexpressions
assert loop.parent.args[0].lhs.name != loop.parent.args[2].lhs.name
def test_keep_order_of_accesses():
f = ps.fields("f: [1D]")
x = TypedSymbol("x", np.float64)
n = 5
loop = LoopOverCoordinate(Block([SympyAssignment(x, f[0]),
SympyAssignment(f[1], 2 * x)]),
0, 0, n)
block = Block([loop])
ps.transformations.resolve_field_accesses(block)
new_loops = ps.transformations.cut_loop(loop, [n - 1])
ps.transformations.move_constants_before_loop(new_loops.args[1])
kernel_func = ps.astnodes.KernelFunction(
block, ps.Target.CPU, ps.Backend.C, ps.cpu.cpujit.make_python_function, None
)
kernel = kernel_func.compile()
print(ps.show_code(kernel_func))
f_arr = np.ones(n + 1)
kernel(f=f_arr)
print(f_arr)
assert np.allclose(f_arr, np.array([
1, 2, 4, 8, 16, 32
]))
import sympy as sp
from pystencils import AssignmentCollection, Assignment
from pystencils.node_collection import NodeCollection
from pystencils.astnodes import SympyAssignment
def test_node_collection_from_assignment_collection():
x = sp.symbols('x')
assignment_collection = AssignmentCollection([Assignment(x, 2)])
node_collection = NodeCollection.from_assignment_collection(assignment_collection)
assert node_collection.all_assignments[0] == SympyAssignment(x, 2)
import io
import json
from http.server import HTTPServer, BaseHTTPRequestHandler
from http.server import BaseHTTPRequestHandler, HTTPServer
from tempfile import TemporaryDirectory
from pystencils.runhelper import ParameterStudy
......
%% Cell type:code id: tags:
``` python
import pytest
pytest.importorskip('cupy')
```
%% Output
<module 'cupy' from '/home/markus/Python311/lib/python3.11/site-packages/cupy/__init__.py'>
%% Cell type:code id: tags:
``` python
from pystencils.session import *
sp.init_printing()
frac = sp.Rational
```
%% Cell type:markdown id: tags:
# Phase-field simulation of dentritic solidification in 3D
This notebook tests the model presented in the dentritic growth tutorial in 3D.
%% Cell type:code id: tags:
``` python
target = ps.Target.GPU
gpu = target == ps.Target.GPU
domain_size = (25, 25, 25) if 'is_test_run' in globals() else (300, 300, 300)
dh = ps.create_data_handling(domain_size=domain_size, periodicity=True, default_target=target)
φ_field = dh.add_array('phi', latex_name='φ')
φ_field_tmp = dh.add_array('phi_tmp', latex_name='φ_tmp')
φ_delta_field = dh.add_array('phidelta', latex_name='φ_D')
t_field = dh.add_array('T')
t_field_tmp = dh.add_array('T_tmp')
```
%% Cell type:code id: tags:
``` python
ε, m, δ, j, θzero, α, γ, Teq, κ, τ = sp.symbols("ε m δ j θ_0 α γ T_eq κ τ")
εb = sp.Symbol("\\bar{\\epsilon}")
discretize = ps.fd.Discretization2ndOrder(dx=0.03, dt=1e-5)
φ = φ_field.center
T = t_field.center
d = ps.fd.Diff
def f(φ, m):
return φ**4 / 4 - (frac(1, 2) - m/3) * φ**3 + (frac(1,4)-m/2)*φ**2
bulk_free_energy_density = f(φ, m)
interface_free_energy_density = ε ** 2 / 2 * (d(φ, 0) ** 2 + d(φ, 1) ** 2 + d(φ, 2) ** 2)
```
%% Cell type:markdown id: tags:
Here comes the major change, that has to be made for the 3D model: $\epsilon$ depends on the interface normal, which can not be computed simply as atan() as in the 2D case
%% Cell type:code id: tags:
``` python
n = sp.Matrix([d(φ, i) for i in range(3)])
nLen = sp.sqrt(sum(n_i**2 for n_i in n))
n = n / nLen
nVal = sum(n_i**4 for n_i in n)
σ = δ * nVal
εVal = εb * (1 + σ)
εVal
```
%% Output
$\displaystyle \bar{\epsilon} \left(δ \left(\frac{{\partial_{0} {φ}_{(0,0,0)}}^{4}}{\left({\partial_{0} {φ}_{(0,0,0)}}^{2} + {\partial_{1} {φ}_{(0,0,0)}}^{2} + {\partial_{2} {φ}_{(0,0,0)}}^{2}\right)^{2}} + \frac{{\partial_{1} {φ}_{(0,0,0)}}^{4}}{\left({\partial_{0} {φ}_{(0,0,0)}}^{2} + {\partial_{1} {φ}_{(0,0,0)}}^{2} + {\partial_{2} {φ}_{(0,0,0)}}^{2}\right)^{2}} + \frac{{\partial_{2} {φ}_{(0,0,0)}}^{4}}{\left({\partial_{0} {φ}_{(0,0,0)}}^{2} + {\partial_{1} {φ}_{(0,0,0)}}^{2} + {\partial_{2} {φ}_{(0,0,0)}}^{2}\right)^{2}}\right) + 1\right)$
⎛ ⎛ 4
⎜ ⎜ D(φ[0,0,0])
\bar{\epsilon}⋅⎜δ⋅⎜───────────────────────────────────────────── + ───────────
⎜ ⎜ 2
⎜ ⎜⎛ 2 2 2⎞ ⎛
⎝ ⎝⎝D(φ[0,0,0]) + D(φ[0,0,0]) + D(φ[0,0,0]) ⎠ ⎝D(φ[0,0,0]
4 4
D(φ[0,0,0]) D(φ[0,0,0])
────────────────────────────────── + ─────────────────────────────────────────
2
2 2 2⎞ ⎛ 2 2
) + D(φ[0,0,0]) + D(φ[0,0,0]) ⎠ ⎝D(φ[0,0,0]) + D(φ[0,0,0]) + D(φ[0,0,0]
⎞ ⎞
⎟ ⎟
────⎟ + 1⎟
2⎟ ⎟
2⎞ ⎟ ⎟
) ⎠ ⎠ ⎠
%% Cell type:code id: tags:
``` python
def m_func(temperature):
return (α / sp.pi) * sp.atan(γ * (Teq - temperature))
```
%% Cell type:code id: tags:
``` python
substitutions = {m: m_func(T),
ε: εVal}
fe_i = interface_free_energy_density.subs(substitutions)
fe_b = bulk_free_energy_density.subs(substitutions)
μ_if = ps.fd.expand_diff_full(ps.fd.functional_derivative(fe_i, φ), functions=[φ])
μ_b = ps.fd.expand_diff_full(ps.fd.functional_derivative(fe_b, φ), functions=[φ])
```
%% Cell type:code id: tags:
``` python
dF_dφ = μ_b + sp.Piecewise((μ_if, nLen**2 > 1e-10), (0, True))
```
%% Cell type:code id: tags:
``` python
parameters = {
τ: 0.0003,
κ: 1.8,
εb: 0.01,
δ: 0.3,
γ: 10,
j: 6,
α: 0.9,
Teq: 1.0,
θzero: 0.2,
sp.pi: sp.pi.evalf()
}
parameters
```
%% Output
$\displaystyle \left\{ \pi : 3.14159265358979, \ T_{eq} : 1.0, \ \bar{\epsilon} : 0.01, \ j : 6, \ α : 0.9, \ γ : 10, \ δ : 0.3, \ θ_{0} : 0.2, \ κ : 1.8, \ τ : 0.0003\right\}$
{π: 3.14159265358979, T_eq: 1.0, \bar{\epsilon}: 0.01, j: 6, α: 0.9, γ: 10, δ:
0.3, θ₀: 0.2, κ: 1.8, τ: 0.0003}
%% Cell type:code id: tags:
``` python
dφ_dt = - dF_dφ / τ
assignments = [
ps.Assignment(φ_delta_field.center, discretize(dφ_dt.subs(parameters))),
]
φEqs = ps.simp.sympy_cse_on_assignment_list(assignments)
φEqs.append(ps.Assignment(φ_field_tmp.center, discretize(ps.fd.transient(φ) - φ_delta_field.center)))
temperatureEvolution = -ps.fd.transient(T) + ps.fd.diffusion(T, 1) + κ * φ_delta_field.center
temperatureEqs = [
ps.Assignment(t_field_tmp.center, discretize(temperatureEvolution.subs(parameters)))
]
```
%% Cell type:code id: tags:
``` python
φ_kernel = ps.create_kernel(φEqs, cpu_openmp=4, target=target).compile()
temperatureKernel = ps.create_kernel(temperatureEqs, cpu_openmp=4, target=target).compile()
```
%% Cell type:code id: tags:
``` python
def time_loop(steps):
φ_sync = dh.synchronization_function(['phi'], target=target)
temperature_sync = dh.synchronization_function(['T'], target=target)
dh.all_to_gpu()
for t in range(steps):
φ_sync()
dh.run_kernel(φ_kernel)
temperature_sync()
dh.run_kernel(temperatureKernel)
dh.swap(φ_field.name, φ_field_tmp.name)
dh.swap(t_field.name, t_field_tmp.name)
dh.all_to_cpu()
def init(nucleus_size=np.sqrt(5)):
for b in dh.iterate():
x, y, z = b.cell_index_arrays
x, y, z = x - b.shape[0] // 2, y - b.shape[1] // 2, z - b.shape[2] // 2
b['phi'].fill(0)
b['phi'][(x ** 2 + y ** 2 + z ** 2) < nucleus_size ** 2] = 1.0
b['T'].fill(0.0)
def plot(slice_obj=ps.make_slice[:, :, 0.5]):
plt.subplot(1, 3, 1)
plt.scalar_field(dh.gather_array('phi', slice_obj).squeeze())
plt.title("φ")
plt.colorbar()
plt.subplot(1, 3, 2)
plt.title("T")
plt.scalar_field(dh.gather_array('T', slice_obj).squeeze())
plt.colorbar()
plt.subplot(1, 3, 3)
plt.title("∂φ")
plt.scalar_field(dh.gather_array('phidelta', slice_obj).squeeze())
plt.colorbar()
```
%% Cell type:code id: tags:
``` python
init()
plot()
print(dh)
```
%% Output
Name| Inner (min/max)| WithGl (min/max)
----------------------------------------------------
T| ( 0, 0)| ( 0, 0)
T_tmp| ( 0, 0)| ( 0, 0)
phi| ( 0, 1)| ( 0, 1)
phi_tmp| ( 0, 0)| ( 0, 0)
phidelta| ( 0, 0)| ( 0, 0)
%% Cell type:code id: tags:
``` python
if 'is_test_run' in globals():
time_loop(2)
assert np.isfinite(dh.max('phi'))
assert np.isfinite(dh.max('T'))
assert np.isfinite(dh.max('phidelta'))
else:
from time import perf_counter
vtk_writer = dh.create_vtk_writer('dentritic_growth_large', ['phi'])
last = perf_counter()
for i in range(4):
time_loop(100)
vtk_writer(i)
print("Step ", i, perf_counter() - last, dh.max('phi'))
last = perf_counter()
```
%% Output
Step 0 19.713090835999992 1.0
Step 1 19.673075279000045 1.0
Step 2 19.696444219 1.0
Step 3 19.752472744999977 1.0
from pystencils import Field, TypedSymbol
from copy import copy, deepcopy
from pystencils.field import Field
from pystencils.typing import TypedSymbol
def test_field_access():
field = Field.create_generic('some_field', spatial_dimensions=2, index_dimensions=0)
......
import os
from tempfile import TemporaryDirectory
import shutil
import pytest
import numpy as np
import pystencils.plot as plt
def example_scalar_field(t=0):
x, y = np.meshgrid(np.linspace(0, 2 * np.pi, 100), np.linspace(0, 2 * np.pi, 100))
z = np.cos(x + 0.1 * t) * np.sin(y + 0.1 * t) + 0.1 * x * y
return z
def example_vector_field(t=0, shape=(40, 40)):
result = np.empty(shape + (2,))
x, y = np.meshgrid(np.linspace(0, 2 * np.pi, shape[0]), np.linspace(0, 2 * np.pi, shape[1]))
result[..., 0] = np.cos(x + 0.1 * t) * np.sin(y + 0.1 * t) + 0.01 * x * y
result[..., 1] = np.cos(0.001 * y)
return result
@pytest.mark.skipif(shutil.which('ffmpeg') is None, reason="ffmpeg not available")
def test_animation():
t = 0
def run_scalar():
nonlocal t
t += 1
return example_scalar_field(t)
def run_vec():
nonlocal t
t += 1
return example_vector_field(t)
plt.clf()
plt.cla()
with TemporaryDirectory() as tmp_dir:
ani = plt.vector_field_magnitude_animation(run_vec, interval=1, frames=2)
ani.save(os.path.join(tmp_dir, "animation1.avi"))
ani = plt.vector_field_animation(run_vec, interval=1, frames=2, rescale=True)
ani.save(os.path.join(tmp_dir, "animation2.avi"))
ani = plt.vector_field_animation(run_vec, interval=1, frames=2, rescale=False)
ani.save(os.path.join(tmp_dir, "animation3.avi"))
ani = plt.scalar_field_animation(run_scalar, interval=1, frames=2, rescale=True)
ani.save(os.path.join(tmp_dir, "animation4.avi"))
ani = plt.scalar_field_animation(run_scalar, interval=1, frames=2, rescale=False)
ani.save(os.path.join(tmp_dir, "animation5.avi"))
ani = plt.surface_plot_animation(run_scalar, frames=2)
ani.save(os.path.join(tmp_dir, "animation6.avi"))
import pytest
import re
import sympy as sp
import pystencils
from pystencils.backends.cbackend import CBackend
class UnsupportedNode(pystencils.astnodes.Node):
def __init__(self):
super().__init__()
@pytest.mark.parametrize('type', ('float32', 'float64', 'int64'))
@pytest.mark.parametrize('negative', (False, 'Negative'))
@pytest.mark.parametrize('target', (pystencils.Target.CPU, pystencils.Target.GPU))
def test_print_infinity(type, negative, target):
x = pystencils.fields(f'x: {type}[1d]')
if negative:
assignment = pystencils.Assignment(x.center, -sp.oo)
else:
assignment = pystencils.Assignment(x.center, sp.oo)
ast = pystencils.create_kernel(assignment, data_type=type, target=target)
if target == pystencils.Target.GPU:
pytest.importorskip('cupy')
ast.compile()
print(ast.compile().code)
def test_print_unsupported_node():
with pytest.raises(NotImplementedError, match='CBackend does not support node of type UnsupportedNode'):
CBackend()(UnsupportedNode())
@pytest.mark.parametrize('dtype', ('float32', 'float64'))
@pytest.mark.parametrize('target', (pystencils.Target.CPU, pystencils.Target.GPU))
def test_print_subtraction(dtype, target):
a, b, c = sp.symbols("a b c")
x = pystencils.fields(f'x: {dtype}[3d]')
y = pystencils.fields(f'y: {dtype}[3d]')
config = pystencils.CreateKernelConfig(target=target, data_type=dtype)
update = pystencils.Assignment(x.center, y.center - a * b ** 8 + b * -1 / 42.0 - 2 * c ** 4)
ast = pystencils.create_kernel(update, config=config)
code = pystencils.get_code_str(ast)
assert "-1.0" not in code
def test_print_small_integer_pow():
printer = pystencils.backends.cbackend.CBackend()
x = sp.Symbol("x")
y = sp.Symbol("y")
n = pystencils.TypedSymbol("n", "int")
t = pystencils.TypedSymbol("t", "float32")
s = pystencils.TypedSymbol("s", "float32")
equs = [
pystencils.astnodes.SympyAssignment(y, 1/x),
pystencils.astnodes.SympyAssignment(y, x*x),
pystencils.astnodes.SympyAssignment(y, 1/(x*x)),
pystencils.astnodes.SympyAssignment(y, x**8),
pystencils.astnodes.SympyAssignment(y, x**(-8)),
pystencils.astnodes.SympyAssignment(y, x**9),
pystencils.astnodes.SympyAssignment(y, x**(-9)),
pystencils.astnodes.SympyAssignment(y, x**n),
pystencils.astnodes.SympyAssignment(y, sp.Pow(4, 4, evaluate=False)),
pystencils.astnodes.SympyAssignment(y, x**0.25),
pystencils.astnodes.SympyAssignment(y, x**y),
pystencils.astnodes.SympyAssignment(y, pystencils.typing.cast_functions.CastFunc(1/x, "float32")),
pystencils.astnodes.SympyAssignment(y, pystencils.typing.cast_functions.CastFunc(x*x, "float32")),
pystencils.astnodes.SympyAssignment(y, (t+s)**(-8)),
pystencils.astnodes.SympyAssignment(y, (t+s)**(-9)),
]
typed = pystencils.typing.transformations.add_types(equs, pystencils.CreateKernelConfig())
regexes = [
r"1\.0\s*/\s*\(?\s*x\s*\)?",
r"x\s*\*\s*x",
r"1\.0\s*/\s*\(\s*x\s*\*x\s*\)",
r"x(\s*\*\s*x){7}",
r"1\.0\s*/\s*\(\s*x(\s*\*\s*x){7}\s*\)",
r"pow\(\s*x\s*,\s*9(\.0)?\s*\)",
r"pow\(\s*x\s*,\s*-9(\.0)?\s*\)",
r"pow\(\s*x\s*,\s*\(?\s*\(\s*double\s*\)\s*\(\s*n\s*\)\s*\)?\s*\)",
r"\(\s*int[a-zA-Z0-9_]*\s*\)\s*\(+\s*4(\s*\*\s*4){3}\s*\)+",
r"pow\(\s*x\s*,\s*0\.25\s*\)",
r"pow\(\s*x\s*,\s*y\s*\)",
r"\(\s*float\s*\)[ ()]*1\.0\s*/\s*\(?\s*x\s*\)?",
r"\(\s*float\s*\)[ ()]*x\s*\*\s*x",
r"\(\s*float\s*\)\s*\(\s*1\.0f\s*/\s*\(\s*\(\s*s\s*\+\s*t\s*\)(\s*\*\s*\(\s*s\s*\+\s*t\s*\)){7}\s*\)",
r"powf\(\s*s\s*\+\s*t\s*,\s*-9\.0f\s*\)",
]
for r, e in zip(regexes, typed):
assert re.search(r, printer(e))
import numpy as np
import pystencils as ps
from pystencils.cpu.vectorization import get_supported_instruction_sets
from pystencils.cpu.vectorization import replace_inner_stride_with_one, vectorize
def test_basic_kernel():
for domain_shape in [(4, 5), (3, 4, 5)]:
dh = ps.create_data_handling(domain_size=domain_shape, periodicity=True)
assert all(dh.periodicity)
f = dh.add_array('f', values_per_cell=1)
tmp = dh.add_array('tmp', values_per_cell=1)
stencil_2d = [(1, 0), (-1, 0), (0, 1), (0, -1)]
stencil_3d = [(1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1)]
stencil = stencil_2d if dh.dim == 2 else stencil_3d
jacobi = ps.Assignment(tmp.center, sum(f.neighbors(stencil)) / len(stencil))
kernel = ps.create_kernel(jacobi).compile()
for b in dh.iterate(ghost_layers=1):
b['f'].fill(42)
dh.run_kernel(kernel)
for b in dh.iterate(ghost_layers=0):
np.testing.assert_equal(b['f'], 42)
float_seq = [1.0, 2.0, 3.0, 4.0]
int_seq = [1, 2, 3]
for op in ('min', 'max', 'sum'):
assert (dh.reduce_float_sequence(float_seq, op) == float_seq).all()
assert (dh.reduce_int_sequence(int_seq, op) == int_seq).all()
def test_basic_blocking_staggered():
f = ps.fields("f: double[2D]")
stag = ps.fields("stag(2): double[2D]", field_type=ps.FieldType.STAGGERED)
terms = [
f[0, 0] - f[-1, 0],
f[0, 0] - f[0, -1],
]
assignments = [ps.Assignment(stag.staggered_access(d), terms[i]) for i, d in enumerate(stag.staggered_stencil)]
kernel = ps.create_staggered_kernel(assignments, cpu_blocking=(3, 16)).compile()
reference_kernel = ps.create_staggered_kernel(assignments).compile()
f_arr = np.random.rand(80, 33)
stag_arr = np.zeros((80, 33, 3))
stag_ref = np.zeros((80, 33, 3))
kernel(f=f_arr, stag=stag_arr)
reference_kernel(f=f_arr, stag=stag_ref)
np.testing.assert_almost_equal(stag_arr, stag_ref)
def test_basic_vectorization():
supported_instruction_sets = get_supported_instruction_sets()
if supported_instruction_sets:
instruction_set = supported_instruction_sets[-1]
else:
instruction_set = None
f, g = ps.fields("f, g : double[2D]")
update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)]
ast = ps.create_kernel(update_rule)
replace_inner_stride_with_one(ast)
vectorize(ast, instruction_set=instruction_set)
func = ast.compile()
arr = np.ones((23 + 2, 17 + 2)) * 5.0
dst = np.zeros_like(arr)
func(g=dst, f=arr)
np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0)
\ No newline at end of file
import numpy as np
import pytest
import pystencils as ps
from pystencils.astnodes import SympyAssignment
from pystencils.node_collection import NodeCollection
from pystencils.rng import PhiloxFourFloats, PhiloxTwoDoubles, AESNIFourFloats, AESNITwoDoubles, random_symbol
from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets
from pystencils.cpu.cpujit import get_compiler_config
from pystencils.typing import TypedSymbol
from pystencils.enums import Target
RNGs = {('philox', 'float'): PhiloxFourFloats, ('philox', 'double'): PhiloxTwoDoubles,
('aesni', 'float'): AESNIFourFloats, ('aesni', 'double'): AESNITwoDoubles}
instruction_sets = get_supported_instruction_sets()
if get_compiler_config()['os'] == 'windows':
# skip instruction sets supported by the CPU but not by the compiler
if 'avx' in instruction_sets and ('/arch:avx2' not in get_compiler_config()['flags'].lower()
and '/arch:avx512' not in get_compiler_config()['flags'].lower()):
instruction_sets.remove('avx')
if 'avx512' in instruction_sets and '/arch:avx512' not in get_compiler_config()['flags'].lower():
instruction_sets.remove('avx512')
if 'avx512vl' in instruction_sets and '/arch:avx512' not in get_compiler_config()['flags'].lower():
instruction_sets.remove('avx512vl')
@pytest.mark.parametrize('target, rng', ((Target.CPU, 'philox'), (Target.CPU, 'aesni'), (Target.GPU, 'philox')))
@pytest.mark.parametrize('precision', ('float', 'double'))
@pytest.mark.parametrize('dtype', ('float', 'double'))
def test_rng(target, rng, precision, dtype, t=124, offsets=(0, 0), keys=(0, 0), offset_values=None):
if target == Target.GPU:
pytest.importorskip('cupy')
if instruction_sets and {'neon', 'sve', 'sve2', 'sme', 'vsx', 'rvv'}.intersection(instruction_sets) and rng == 'aesni':
pytest.xfail('AES not yet implemented for this architecture')
if rng == 'aesni' and len(keys) == 2:
keys *= 2
if offset_values is None:
offset_values = offsets
dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target=target)
f = dh.add_array("f", values_per_cell=4 if precision == 'float' else 2,
dtype=np.float32 if dtype == 'float' else np.float64)
dh.fill(f.name, 42.0)
rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets, keys=keys)
assignments = [rng_node] + [SympyAssignment(f(i), s) for i, s in enumerate(rng_node.result_symbols)]
kernel = ps.create_kernel(assignments, target=dh.default_target).compile()
dh.all_to_gpu()
kwargs = {'time_step': t}
if offset_values != offsets:
kwargs.update({k.name: v for k, v in zip(offsets, offset_values)})
dh.run_kernel(kernel, **kwargs)
dh.all_to_cpu()
arr = dh.gather_array(f.name)
assert np.logical_and(arr <= 1.0, arr >= 0).all()
if rng == 'philox' and t == 124 and offsets == (0, 0) and keys == (0, 0) and dh.shape == (2, 2):
int_reference = np.array([[[3576608082, 1252663339, 1987745383, 348040302],
[1032407765, 970978240, 2217005168, 2424826293]],
[[2958765206, 3725192638, 2623672781, 1373196132],
[850605163, 1694561295, 3285694973, 2799652583]]])
else:
pytest.importorskip('randomgen')
if rng == 'aesni':
from randomgen import AESCounter
int_reference = np.empty(dh.shape + (4,), dtype=int)
for x in range(dh.shape[0]):
for y in range(dh.shape[1]):
r = AESCounter(counter=t + (x + offset_values[0]) * 2 ** 32 + (y + offset_values[1]) * 2 ** 64,
key=keys[0] + keys[1] * 2 ** 32 + keys[2] * 2 ** 64 + keys[3] * 2 ** 96,
mode="sequence")
a, b = r.random_raw(size=2)
int_reference[x, y, :] = [a % 2 ** 32, a // 2 ** 32, b % 2 ** 32, b // 2 ** 32]
else:
from randomgen import Philox
int_reference = np.empty(dh.shape + (4,), dtype=int)
for x in range(dh.shape[0]):
for y in range(dh.shape[1]):
r = Philox(counter=t + (x + offset_values[0]) * 2 ** 32 + (y + offset_values[1]) * 2 ** 64 - 1,
key=keys[0] + keys[1] * 2 ** 32, number=4, width=32, mode="sequence")
int_reference[x, y, :] = r.random_raw(size=4)
if precision == 'float' or dtype == 'float':
eps = np.finfo(np.float32).eps
else:
eps = np.finfo(np.float64).eps
if rng == 'aesni': # precision appears to be slightly worse
eps = max(1e-12, 2 * eps)
if precision == 'float':
reference = int_reference * 2. ** -32 + 2. ** -33
else:
x = int_reference[:, :, 0::2]
y = int_reference[:, :, 1::2]
z = x ^ y << (53 - 32)
reference = z * 2. ** -53 + 2. ** -54
assert np.allclose(arr, reference, rtol=0, atol=eps)
@pytest.mark.parametrize('vectorized', (False, True))
@pytest.mark.parametrize('kind', ('value', 'symbol'))
def test_rng_offsets(kind, vectorized):
if vectorized:
test = test_rng_vectorized
if not instruction_sets:
pytest.skip("cannot detect CPU instruction set")
else:
test = test_rng
if kind == 'value':
test(instruction_sets[-1] if vectorized else Target.CPU, 'philox', 'float', 'float', t=8,
offsets=(6, 7), keys=(5, 309))
elif kind == 'symbol':
offsets = (TypedSymbol("x0", np.uint32), TypedSymbol("y0", np.uint32))
test(instruction_sets[-1] if vectorized else Target.GPU, 'philox', 'float', 'float', t=8,
offsets=offsets, offset_values=(6, 7), keys=(5, 309))
@pytest.mark.parametrize('target', instruction_sets)
@pytest.mark.parametrize('rng', ('philox', 'aesni'))
@pytest.mark.parametrize('precision,dtype', (('float', 'float'), ('double', 'double')))
def test_rng_vectorized(target, rng, precision, dtype, t=130, offsets=(1, 3), keys=(0, 0), offset_values=None):
if (target in ['neon', 'vsx', 'rvv', 'sme'] or target.startswith('sve')) and rng == 'aesni':
pytest.xfail('AES not yet implemented for this architecture')
cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': True, 'instruction_set': target}
dh = ps.create_data_handling((131, 131), default_ghost_layers=0, default_target=Target.CPU)
f = dh.add_array("f", values_per_cell=4 if precision == 'float' else 2,
dtype=np.float32 if dtype == 'float' else np.float64, alignment=True)
dh.fill(f.name, 42.0)
ref = dh.add_array("ref", values_per_cell=4 if precision == 'float' else 2)
rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets)
assignments = [rng_node] + [SympyAssignment(ref(i), s) for i, s in enumerate(rng_node.result_symbols)]
kernel = ps.create_kernel(assignments, target=dh.default_target).compile()
kwargs = {'time_step': t}
if offset_values is not None:
kwargs.update({k.name: v for k, v in zip(offsets, offset_values)})
dh.run_kernel(kernel, **kwargs)
rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets)
assignments = [rng_node] + [SympyAssignment(f(i), s) for i, s in enumerate(rng_node.result_symbols)]
kernel = ps.create_kernel(assignments, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile()
dh.run_kernel(kernel, **kwargs)
ref_data = dh.gather_array(ref.name)
data = dh.gather_array(f.name)
assert np.allclose(ref_data, data)
@pytest.mark.parametrize('vectorized', (False, True))
def test_rng_symbol(vectorized):
"""Make sure that the RNG symbol generator generates symbols and that the resulting code compiles"""
cpu_vectorize_info = None
if vectorized:
if not instruction_sets:
pytest.skip("cannot detect CPU instruction set")
else:
cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': True,
'instruction_set': instruction_sets[-1]}
dh = ps.create_data_handling((8, 8), default_ghost_layers=0, default_target=Target.CPU)
f = dh.add_array("f", values_per_cell=2 * dh.dim, alignment=True)
nc = NodeCollection([SympyAssignment(f(i), 0) for i in range(f.shape[-1])])
subexpressions = []
rng_symbol_gen = random_symbol(subexpressions, dim=dh.dim)
for i in range(f.shape[-1]):
nc.all_assignments[i] = SympyAssignment(nc.all_assignments[i].lhs, next(rng_symbol_gen))
symbols = [a.rhs for a in nc.all_assignments]
[nc.all_assignments.insert(0, subexpression) for subexpression in subexpressions]
assert len(symbols) == f.shape[-1] and len(set(symbols)) == f.shape[-1]
ps.create_kernel(nc, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile()
@pytest.mark.parametrize('vectorized', (False, True))
def test_staggered(vectorized):
"""Make sure that the RNG counter can be substituted during loop cutting"""
dh = ps.create_data_handling((8, 8), default_ghost_layers=0, default_target=Target.CPU)
j = dh.add_array("j", values_per_cell=dh.dim, field_type=ps.FieldType.STAGGERED_FLUX)
a = ps.AssignmentCollection([ps.Assignment(j.staggered_access(n), 0) for n in j.staggered_stencil])
rng_symbol_gen = random_symbol(a.subexpressions, dim=dh.dim, rng_node=PhiloxTwoDoubles)
a.main_assignments[0] = ps.Assignment(a.main_assignments[0].lhs, next(rng_symbol_gen))
kernel = ps.create_staggered_kernel(a, target=dh.default_target).compile()
if not vectorized:
return
if not instruction_sets:
pytest.skip("cannot detect CPU instruction set")
pytest.importorskip('islpy')
cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': False,
'instruction_set': instruction_sets[-1]}
dh.fill(j.name, 867)
dh.run_kernel(kernel, seed=5, time_step=309)
ref_data = dh.gather_array(j.name)
kernel2 = ps.create_staggered_kernel(a, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile()
dh.fill(j.name, 867)
dh.run_kernel(kernel2, seed=5, time_step=309)
data = dh.gather_array(j.name)
assert np.allclose(ref_data, data)
from pystencils.cache import sharedmethodcache
class Fib:
def __init__(self):
self.fib_rec_called = 0
self.fib_iter_called = 0
@sharedmethodcache("fib_cache")
def fib_rec(self, n):
self.fib_rec_called += 1
return 1 if n <= 1 else self.fib_rec(n-1) + self.fib_rec(n-2)
@sharedmethodcache("fib_cache")
def fib_iter(self, n):
self.fib_iter_called += 1
f1, f2 = 0, 1
for i in range(n):
f2 = f1 + f2
f1 = f2 - f1
return f2
def test_fib_memoization_1():
fib = Fib()
assert "fib_cache" not in fib.__dict__
f13 = fib.fib_rec(13)
assert fib.fib_rec_called == 14
assert "fib_cache" in fib.__dict__
assert fib.fib_cache[(13,)] == f13
for k in range(14):
# fib_iter should use cached results from fib_rec
fib.fib_iter(k)
assert fib.fib_iter_called == 0
def test_fib_memoization_2():
fib = Fib()
f11 = fib.fib_iter(11)
f12 = fib.fib_iter(12)
assert fib.fib_iter_called == 2
f13 = fib.fib_rec(13)
# recursive calls should be cached
assert fib.fib_rec_called == 1
class Triad:
def __init__(self):
self.triad_called = 0
@sharedmethodcache("triad_cache")
def triad(self, a, b, c=0):
"""Computes the triad a*b+c."""
self.triad_called += 1
return a * b + c
def test_triad_memoization():
triad = Triad()
assert triad.triad.__doc__ == "Computes the triad a*b+c."
t = triad.triad(12, 4, 15)
assert triad.triad_called == 1
assert triad.triad_cache[(12, 4, 15)] == t
t = triad.triad(12, 4, c=15)
assert triad.triad_called == 2
assert triad.triad_cache[(12, 4, 'c', 15)] == t
t = triad.triad(12, 4, 15)
assert triad.triad_called == 2
t = triad.triad(12, 4, c=15)
assert triad.triad_called == 2
import sympy as sp
import pystencils as ps
from pystencils import Assignment, AssignmentCollection
from pystencils.simp import SimplificationStrategy, apply_on_all_subexpressions, \
subexpression_substitution_in_existing_subexpressions
from pystencils.simp import (
SimplificationStrategy, apply_on_all_subexpressions,
subexpression_substitution_in_existing_subexpressions)
def test_simplification_strategy():
......@@ -27,7 +30,7 @@ def test_simplification_strategy():
result = strategy(ac)
assert result.operation_count['adds'] == 7
assert result.operation_count['muls'] == 5
assert result.operation_count['muls'] == 4
assert result.operation_count['divs'] == 0
# Trigger display routines, such that they are at least executed
......@@ -41,3 +44,45 @@ def test_simplification_strategy():
assert 'Adds' in report._repr_html_()
assert 'factor' in str(strategy)
def test_split_inner_loop():
dst = ps.fields('dst(8): double[2D]')
s = sp.symbols('s_:8')
x = sp.symbols('x')
subexpressions = []
main = [
Assignment(dst[0, 0](0), s[0]),
Assignment(dst[0, 0](1), s[1]),
Assignment(dst[0, 0](2), s[2]),
Assignment(dst[0, 0](3), s[3]),
Assignment(dst[0, 0](4), s[4]),
Assignment(dst[0, 0](5), s[5]),
Assignment(dst[0, 0](6), s[6]),
Assignment(dst[0, 0](7), s[7]),
Assignment(x, sum(s))
]
ac = AssignmentCollection(main, subexpressions)
split_groups = [[dst[0, 0](0), dst[0, 0](1)],
[dst[0, 0](2), dst[0, 0](3)],
[dst[0, 0](4), dst[0, 0](5)],
[dst[0, 0](6), dst[0, 0](7), x]]
ac.simplification_hints['split_groups'] = split_groups
ast = ps.create_kernel(ac)
code = ps.get_code_str(ast)
ps.show_code(ast)
# we have four inner loops as indicated in split groups (4 elements) plus one outer loop
assert code.count('for') == 5
ast = ps.create_kernel(ac, target=ps.Target.GPU)
code = ps.get_code_str(ast)
# on GPUs is wouldn't be good to use loop splitting
assert code.count('for') == 0
ac = AssignmentCollection(main, subexpressions)
ast = ps.create_kernel(ac)
code = ps.get_code_str(ast)
# one inner loop and one outer loop
assert code.count('for') == 2
from sys import version_info as vs
import pytest
import pystencils.config
import sympy as sp
import pystencils as ps
from pystencils import Assignment, AssignmentCollection, fields
from pystencils.simp import subexpression_substitution_in_main_assignments
from pystencils.simp import add_subexpressions_for_divisions
from pystencils.simp import add_subexpressions_for_sums
from pystencils.simp import add_subexpressions_for_field_reads
from pystencils.simp.simplifications import add_subexpressions_for_constants
from pystencils.typing import BasicType, TypedSymbol
a, b, c, d, x, y, z = sp.symbols("a b c d x y z")
s0, s1, s2, s3 = sp.symbols("s_:4")
f = sp.symbols("f_:9")
def test_subexpression_substitution_in_main_assignments():
subexpressions = [
Assignment(s0, 2 * a + 2 * b),
Assignment(s1, 2 * a + 2 * b + 2 * c),
Assignment(s2, 2 * a + 2 * b + 2 * c + 2 * d),
Assignment(s3, 2 * a + 2 * b * c),
Assignment(x, s1 + s2 + s0 + s3)
]
main = [
Assignment(f[0], s1 + s2 + s0 + s3),
Assignment(f[1], s1 + s2 + s0 + s3),
Assignment(f[2], s1 + s2 + s0 + s3),
Assignment(f[3], s1 + s2 + s0 + s3),
Assignment(f[4], s1 + s2 + s0 + s3)
]
ac = AssignmentCollection(main, subexpressions)
ac = subexpression_substitution_in_main_assignments(ac)
for i in range(0, len(ac.main_assignments)):
assert ac.main_assignments[i].rhs == x
def test_add_subexpressions_for_divisions():
subexpressions = [
Assignment(s0, 2 / a + 2 / b),
Assignment(s1, 2 / a + 2 / b + 2 / c),
Assignment(s2, 2 / a + 2 / b + 2 / c + 2 / d),
Assignment(s3, 2 / a + 2 / b / c),
Assignment(x, s1 + s2 + s0 + s3)
]
main = [
Assignment(f[0], s1 + s2 + s0 + s3)
]
ac = AssignmentCollection(main, subexpressions)
divs_before_optimisation = ac.operation_count["divs"]
ac = add_subexpressions_for_divisions(ac)
divs_after_optimisation = ac.operation_count["divs"]
assert divs_before_optimisation - divs_after_optimisation == 8
rhs = []
for i in range(len(ac.subexpressions)):
rhs.append(ac.subexpressions[i].rhs)
assert 1/a in rhs
assert 1/b in rhs
assert 1/c in rhs
assert 1/d in rhs
def test_add_subexpressions_for_constants():
half = sp.Rational(1,2)
sqrt_2 = sp.sqrt(2)
main = [
Assignment(f[0], half * a + half * b + half * c),
Assignment(f[1], - half * a - half * b),
Assignment(f[2], a * sqrt_2 - b * sqrt_2),
Assignment(f[3], a**2 + b**2)
]
ac = AssignmentCollection(main)
ac = add_subexpressions_for_constants(ac)
assert len(ac.subexpressions) == 2
half_subexp = None
sqrt_subexp = None
for asm in ac.subexpressions:
if asm.rhs == half:
half_subexp = asm.lhs
elif asm.rhs == sqrt_2:
sqrt_subexp = asm.lhs
else:
pytest.fail(f"An unexpected subexpression was encountered: {asm}")
assert half_subexp is not None
assert sqrt_subexp is not None
for asm in ac.main_assignments[:3]:
assert isinstance(asm.rhs, sp.Mul)
assert any(arg == half_subexp for arg in ac.main_assignments[0].rhs.args)
assert any(arg == half_subexp for arg in ac.main_assignments[1].rhs.args)
assert any(arg == sqrt_subexp for arg in ac.main_assignments[2].rhs.args)
# Do not replace exponents!
assert ac.main_assignments[3].rhs == a**2 + b**2
def test_add_subexpressions_for_sums():
subexpressions = [
Assignment(s0, a + b + c + d),
Assignment(s1, 3 * a * sp.sqrt(x) + 4 * b + c),
Assignment(s2, 3 * a * sp.sqrt(x) + 4 * b + c),
Assignment(s3, 3 * a * sp.sqrt(x) + 4 * b + c)
]
main = [
Assignment(f[0], s1 + s2 + s0 + s3)
]
ac = AssignmentCollection(main, subexpressions)
ops_before_optimisation = ac.operation_count
ac = add_subexpressions_for_sums(ac)
ops_after_optimisation = ac.operation_count
assert ops_after_optimisation["adds"] == ops_before_optimisation["adds"]
assert ops_after_optimisation["muls"] < ops_before_optimisation["muls"]
assert ops_after_optimisation["sqrts"] < ops_before_optimisation["sqrts"]
rhs = []
for i in range(len(ac.subexpressions)):
rhs.append(ac.subexpressions[i].rhs)
assert a + b + c + d in rhs
assert 3 * a * sp.sqrt(x) in rhs
def test_add_subexpressions_for_field_reads():
s, v = fields("s(5), v(5): double[2D]")
subexpressions = []
main = [Assignment(s[0, 0](0), 3 * v[0, 0](0)),
Assignment(s[0, 0](1), 10 * v[0, 0](1))]
ac = AssignmentCollection(main, subexpressions)
assert len(ac.subexpressions) == 0
ac2 = add_subexpressions_for_field_reads(ac)
assert len(ac2.subexpressions) == 2
ac3 = add_subexpressions_for_field_reads(ac, data_type="float32")
assert len(ac3.subexpressions) == 2
assert isinstance(ac3.subexpressions[0].lhs, TypedSymbol)
assert ac3.subexpressions[0].lhs.dtype == BasicType("float32")
# added check for early out of add_subexpressions_for_field_reads is no fields appear on the rhs (See #92)
main = [Assignment(s[0, 0](0), 3.0),
Assignment(s[0, 0](1), 4.0)]
ac4 = AssignmentCollection(main, subexpressions)
assert len(ac4.subexpressions) == 0
ac5 = add_subexpressions_for_field_reads(ac4)
assert ac5 is not None
assert ac4 is ac5
@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU))
@pytest.mark.parametrize('dtype', ('float32', 'float64'))
@pytest.mark.skipif((vs.major, vs.minor, vs.micro) == (3, 8, 2), reason="does not work on python 3.8.2 for some reason")
def test_sympy_optimizations(target, dtype):
if target == ps.Target.GPU:
pytest.importorskip("cupy")
src, dst = ps.fields(f'src, dst: {dtype}[2d]')
assignments = ps.AssignmentCollection({
src[0, 0]: 1.0 * (sp.exp(dst[0, 0]) - 1)
})
config = pystencils.config.CreateKernelConfig(target=target, default_number_float=dtype)
ast = ps.create_kernel(assignments, config=config)
ps.show_code(ast)
code = ps.get_code_str(ast)
if dtype == 'float32':
assert 'expf(' in code
elif dtype == 'float64':
assert 'exp(' in code
@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU))
@pytest.mark.parametrize('simplification', (True, False))
@pytest.mark.skipif((vs.major, vs.minor, vs.micro) == (3, 8, 2), reason="does not work on python 3.8.2 for some reason")
def test_evaluate_constant_terms(target, simplification):
if target == ps.Target.GPU:
pytest.importorskip("cupy")
src, dst = ps.fields('src, dst: float32[2d]')
# cos of a number will always be simplified
assignments = ps.AssignmentCollection({
src[0, 0]: -sp.cos(1) + dst[0, 0]
})
config = pystencils.config.CreateKernelConfig(target=target, default_assignment_simplifications=simplification)
ast = ps.create_kernel(assignments, config=config)
code = ps.get_code_str(ast)
assert 'cos(' not in code