Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 980 additions and 53 deletions
import pytest
import sympy as sp
import numpy as np
import pystencils as ps
from pystencils.fast_approximation import fast_division
@pytest.mark.parametrize('dtype', ["float64", "float32"])
@pytest.mark.parametrize('func', [sp.Pow, sp.atan2])
@pytest.mark.parametrize('target', [ps.Target.CPU, ps.Target.GPU])
def test_two_arguments(dtype, func, target):
if target == ps.Target.GPU:
pytest.importorskip("cupy")
dh = ps.create_data_handling(domain_size=(10, 10), periodicity=True, default_target=target)
x = dh.add_array('x', values_per_cell=1, dtype=dtype)
dh.fill("x", 0.0, ghost_layers=True)
y = dh.add_array('y', values_per_cell=1, dtype=dtype)
dh.fill("y", 1.0, ghost_layers=True)
z = dh.add_array('z', values_per_cell=1, dtype=dtype)
dh.fill("z", 2.0, ghost_layers=True)
config = ps.CreateKernelConfig(target=target)
# test sp.Max with one argument
up = ps.Assignment(x.center, func(y.center, z.center))
ast = ps.create_kernel(up, config=config)
code = ps.get_code_str(ast)
if dtype == 'float32':
assert func.__name__.lower() in code
kernel = ast.compile()
dh.all_to_gpu()
dh.run_kernel(kernel)
dh.all_to_cpu()
np.testing.assert_allclose(dh.gather_array("x")[0, 0], float(func(1.0, 2.0).evalf()),
13 if dtype == 'float64' else 5)
@pytest.mark.parametrize('dtype', ["float64", "float32"])
@pytest.mark.parametrize('func', [sp.sin, sp.cos, sp.sinh, sp.cosh, sp.atan, sp.floor, sp.ceiling])
@pytest.mark.parametrize('target', [ps.Target.CPU, ps.Target.GPU])
def test_single_arguments(dtype, func, target):
if target == ps.Target.GPU:
pytest.importorskip("cupy")
dh = ps.create_data_handling(domain_size=(10, 10), periodicity=True, default_target=target)
x = dh.add_array('x', values_per_cell=1, dtype=dtype)
dh.fill("x", 0.0, ghost_layers=True)
y = dh.add_array('y', values_per_cell=1, dtype=dtype)
dh.fill("y", 1.0, ghost_layers=True)
config = ps.CreateKernelConfig(target=target)
# test sp.Max with one argument
up = ps.Assignment(x.center, func(y.center))
ast = ps.create_kernel(up, config=config)
code = ps.get_code_str(ast)
if dtype == 'float32':
func_name = func.__name__.lower() if func is not sp.ceiling else "ceil"
assert func_name in code
kernel = ast.compile()
dh.all_to_gpu()
dh.run_kernel(kernel)
dh.all_to_cpu()
np.testing.assert_allclose(dh.gather_array("x")[0, 0], float(func(1.0).evalf()),
rtol=10**-3 if dtype == 'float32' else 10**-5)
@pytest.mark.parametrize('a', [sp.Symbol('a'), ps.fields('a: float64[2d]').center])
def test_avoid_pow(a):
x = ps.fields('x: float64[2d]')
up = ps.Assignment(x.center_vector[0], 2 * a ** 2 / 3)
ast = ps.create_kernel(up)
code = ps.get_code_str(ast)
assert "pow" not in code
def test_avoid_pow_fast_div():
x = ps.fields('x: float64[2d]')
a = ps.fields('a: float64[2d]').center
up = ps.Assignment(x.center_vector[0], fast_division(1, (a**2)))
ast = ps.create_kernel(up, config=ps.CreateKernelConfig(target=ps.Target.GPU))
# ps.show_code(ast)
code = ps.get_code_str(ast)
assert "pow" not in code
def test_avoid_pow_move_constants():
# At the end of the kernel creation the function move_constants_before_loop will be called
# This function additionally contains substitutions for symbols with the same value
# Thus it simplifies the equations again
x = ps.fields('x: float64[2d]')
a, b, c = sp.symbols("a, b, c")
up = [ps.Assignment(a, 0.0),
ps.Assignment(b, 0.0),
ps.Assignment(c, 0.0),
ps.Assignment(x.center_vector[0], a**2/18 - a*b/6 - a/18 + b**2/18 + b/18 - c**2/36)]
ast = ps.create_kernel(up)
code = ps.get_code_str(ast)
ps.show_code(ast)
assert "pow" not in code
import pytest
import numpy as np
import sympy as sp
import pystencils as ps
from pystencils.astnodes import LoopOverCoordinate, Conditional, Block, SympyAssignment
SLICE_LIST = [False,
ps.make_slice[1:-1:2, 1:-1:2],
ps.make_slice[2:-1:2, 4:-1:7],
ps.make_slice[4:-1:2, 5:-1:2],
ps.make_slice[3:-1:4, 7:-1:3]]
@pytest.mark.parametrize('target', [ps.Target.CPU, ps.Target.GPU])
@pytest.mark.parametrize('iteration_slice', SLICE_LIST)
def test_mod(target, iteration_slice):
if target == ps.Target.GPU:
pytest.importorskip("cupy")
dh = ps.create_data_handling(domain_size=(51, 51), periodicity=True, default_target=target)
loop_ctrs = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dh.dim)]
cond = [sp.Eq(sp.Mod(loop_ctrs[i], 2), 1) for i in range(dh.dim)]
field = dh.add_array("a", values_per_cell=1)
eq_list = [SympyAssignment(field.center, 1.0)]
if iteration_slice:
config = ps.CreateKernelConfig(target=dh.default_target, iteration_slice=iteration_slice)
assign = eq_list
else:
assign = [Conditional(sp.And(*cond), Block(eq_list))]
config = ps.CreateKernelConfig(target=dh.default_target)
kernel = ps.create_kernel(assign, config=config).compile()
dh.fill(field.name, 0, ghost_layers=True)
if config.target == ps.enums.Target.GPU:
dh.to_gpu(field.name)
dh.run_kernel(kernel)
if config.target == ps.enums.Target.GPU:
dh.to_cpu(field.name)
result = dh.gather_array(field.name, ghost_layers=True)
assert np.all(result[iteration_slice] == 1.0)
......@@ -25,7 +25,40 @@ def test_symbol_renaming():
loops = block.atoms(LoopOverCoordinate)
assert len(loops) == 2
assert len(block.args[1].body.args) == 1
assert len(block.args[3].body.args) == 2
for loop in loops:
assert len(loop.body.args) == 1
assert len(loop.parent.args) == 4 # 2 loops + 2 subexpressions
assert loop.parent.args[0].lhs.name != loop.parent.args[1].lhs.name
assert loop.parent.args[0].lhs.name != loop.parent.args[2].lhs.name
def test_keep_order_of_accesses():
f = ps.fields("f: [1D]")
x = TypedSymbol("x", np.float64)
n = 5
loop = LoopOverCoordinate(Block([SympyAssignment(x, f[0]),
SympyAssignment(f[1], 2 * x)]),
0, 0, n)
block = Block([loop])
ps.transformations.resolve_field_accesses(block)
new_loops = ps.transformations.cut_loop(loop, [n - 1])
ps.transformations.move_constants_before_loop(new_loops.args[1])
kernel_func = ps.astnodes.KernelFunction(
block, ps.Target.CPU, ps.Backend.C, ps.cpu.cpujit.make_python_function, None
)
kernel = kernel_func.compile()
print(ps.show_code(kernel_func))
f_arr = np.ones(n + 1)
kernel(f=f_arr)
print(f_arr)
assert np.allclose(f_arr, np.array([
1, 2, 4, 8, 16, 32
]))
import sympy as sp
from pystencils import AssignmentCollection, Assignment
from pystencils.node_collection import NodeCollection
from pystencils.astnodes import SympyAssignment
def test_node_collection_from_assignment_collection():
x = sp.symbols('x')
assignment_collection = AssignmentCollection([Assignment(x, 2)])
node_collection = NodeCollection.from_assignment_collection(assignment_collection)
assert node_collection.all_assignments[0] == SympyAssignment(x, 2)
%% Cell type:code id: tags:
``` python
import pytest
pytest.importorskip('cupy')
```
%% Output
<module 'cupy' from '/home/markus/Python311/lib/python3.11/site-packages/cupy/__init__.py'>
%% Cell type:code id: tags:
``` python
from pystencils.session import *
sp.init_printing()
frac = sp.Rational
```
%% Cell type:markdown id: tags:
# Phase-field simulation of dentritic solidification in 3D
This notebook tests the model presented in the dentritic growth tutorial in 3D.
%% Cell type:code id: tags:
``` python
target = ps.Target.GPU
gpu = target == ps.Target.GPU
domain_size = (25, 25, 25) if 'is_test_run' in globals() else (300, 300, 300)
dh = ps.create_data_handling(domain_size=domain_size, periodicity=True, default_target=target)
φ_field = dh.add_array('phi', latex_name='φ')
φ_field_tmp = dh.add_array('phi_tmp', latex_name='φ_tmp')
φ_delta_field = dh.add_array('phidelta', latex_name='φ_D')
t_field = dh.add_array('T')
t_field_tmp = dh.add_array('T_tmp')
```
%% Cell type:code id: tags:
``` python
ε, m, δ, j, θzero, α, γ, Teq, κ, τ = sp.symbols("ε m δ j θ_0 α γ T_eq κ τ")
εb = sp.Symbol("\\bar{\\epsilon}")
discretize = ps.fd.Discretization2ndOrder(dx=0.03, dt=1e-5)
φ = φ_field.center
T = t_field.center
d = ps.fd.Diff
def f(φ, m):
return φ**4 / 4 - (frac(1, 2) - m/3) * φ**3 + (frac(1,4)-m/2)*φ**2
bulk_free_energy_density = f(φ, m)
interface_free_energy_density = ε ** 2 / 2 * (d(φ, 0) ** 2 + d(φ, 1) ** 2 + d(φ, 2) ** 2)
```
%% Cell type:markdown id: tags:
Here comes the major change, that has to be made for the 3D model: $\epsilon$ depends on the interface normal, which can not be computed simply as atan() as in the 2D case
%% Cell type:code id: tags:
``` python
n = sp.Matrix([d(φ, i) for i in range(3)])
nLen = sp.sqrt(sum(n_i**2 for n_i in n))
n = n / nLen
nVal = sum(n_i**4 for n_i in n)
σ = δ * nVal
εVal = εb * (1 + σ)
εVal
```
%% Output
$\displaystyle \bar{\epsilon} \left(δ \left(\frac{{\partial_{0} {φ}_{(0,0,0)}}^{4}}{\left({\partial_{0} {φ}_{(0,0,0)}}^{2} + {\partial_{1} {φ}_{(0,0,0)}}^{2} + {\partial_{2} {φ}_{(0,0,0)}}^{2}\right)^{2}} + \frac{{\partial_{1} {φ}_{(0,0,0)}}^{4}}{\left({\partial_{0} {φ}_{(0,0,0)}}^{2} + {\partial_{1} {φ}_{(0,0,0)}}^{2} + {\partial_{2} {φ}_{(0,0,0)}}^{2}\right)^{2}} + \frac{{\partial_{2} {φ}_{(0,0,0)}}^{4}}{\left({\partial_{0} {φ}_{(0,0,0)}}^{2} + {\partial_{1} {φ}_{(0,0,0)}}^{2} + {\partial_{2} {φ}_{(0,0,0)}}^{2}\right)^{2}}\right) + 1\right)$
⎛ ⎛ 4
⎜ ⎜ D(φ[0,0,0])
\bar{\epsilon}⋅⎜δ⋅⎜───────────────────────────────────────────── + ───────────
⎜ ⎜ 2
⎜ ⎜⎛ 2 2 2⎞ ⎛
⎝ ⎝⎝D(φ[0,0,0]) + D(φ[0,0,0]) + D(φ[0,0,0]) ⎠ ⎝D(φ[0,0,0]
4 4
D(φ[0,0,0]) D(φ[0,0,0])
────────────────────────────────── + ─────────────────────────────────────────
2
2 2 2⎞ ⎛ 2 2
) + D(φ[0,0,0]) + D(φ[0,0,0]) ⎠ ⎝D(φ[0,0,0]) + D(φ[0,0,0]) + D(φ[0,0,0]
⎞ ⎞
⎟ ⎟
────⎟ + 1⎟
2⎟ ⎟
2⎞ ⎟ ⎟
) ⎠ ⎠ ⎠
%% Cell type:code id: tags:
``` python
def m_func(temperature):
return (α / sp.pi) * sp.atan(γ * (Teq - temperature))
```
%% Cell type:code id: tags:
``` python
substitutions = {m: m_func(T),
ε: εVal}
fe_i = interface_free_energy_density.subs(substitutions)
fe_b = bulk_free_energy_density.subs(substitutions)
μ_if = ps.fd.expand_diff_full(ps.fd.functional_derivative(fe_i, φ), functions=[φ])
μ_b = ps.fd.expand_diff_full(ps.fd.functional_derivative(fe_b, φ), functions=[φ])
```
%% Cell type:code id: tags:
``` python
dF_dφ = μ_b + sp.Piecewise((μ_if, nLen**2 > 1e-10), (0, True))
```
%% Cell type:code id: tags:
``` python
parameters = {
τ: 0.0003,
κ: 1.8,
εb: 0.01,
δ: 0.3,
γ: 10,
j: 6,
α: 0.9,
Teq: 1.0,
θzero: 0.2,
sp.pi: sp.pi.evalf()
}
parameters
```
%% Output
$\displaystyle \left\{ \pi : 3.14159265358979, \ T_{eq} : 1.0, \ \bar{\epsilon} : 0.01, \ j : 6, \ α : 0.9, \ γ : 10, \ δ : 0.3, \ θ_{0} : 0.2, \ κ : 1.8, \ τ : 0.0003\right\}$
{π: 3.14159265358979, T_eq: 1.0, \bar{\epsilon}: 0.01, j: 6, α: 0.9, γ: 10, δ:
0.3, θ₀: 0.2, κ: 1.8, τ: 0.0003}
%% Cell type:code id: tags:
``` python
dφ_dt = - dF_dφ / τ
assignments = [
ps.Assignment(φ_delta_field.center, discretize(dφ_dt.subs(parameters))),
]
φEqs = ps.simp.sympy_cse_on_assignment_list(assignments)
φEqs.append(ps.Assignment(φ_field_tmp.center, discretize(ps.fd.transient(φ) - φ_delta_field.center)))
temperatureEvolution = -ps.fd.transient(T) + ps.fd.diffusion(T, 1) + κ * φ_delta_field.center
temperatureEqs = [
ps.Assignment(t_field_tmp.center, discretize(temperatureEvolution.subs(parameters)))
]
```
%% Cell type:code id: tags:
``` python
φ_kernel = ps.create_kernel(φEqs, cpu_openmp=4, target=target).compile()
temperatureKernel = ps.create_kernel(temperatureEqs, cpu_openmp=4, target=target).compile()
```
%% Cell type:code id: tags:
``` python
def time_loop(steps):
φ_sync = dh.synchronization_function(['phi'], target=target)
temperature_sync = dh.synchronization_function(['T'], target=target)
dh.all_to_gpu()
for t in range(steps):
φ_sync()
dh.run_kernel(φ_kernel)
temperature_sync()
dh.run_kernel(temperatureKernel)
dh.swap(φ_field.name, φ_field_tmp.name)
dh.swap(t_field.name, t_field_tmp.name)
dh.all_to_cpu()
def init(nucleus_size=np.sqrt(5)):
for b in dh.iterate():
x, y, z = b.cell_index_arrays
x, y, z = x - b.shape[0] // 2, y - b.shape[1] // 2, z - b.shape[2] // 2
b['phi'].fill(0)
b['phi'][(x ** 2 + y ** 2 + z ** 2) < nucleus_size ** 2] = 1.0
b['T'].fill(0.0)
def plot(slice_obj=ps.make_slice[:, :, 0.5]):
plt.subplot(1, 3, 1)
plt.scalar_field(dh.gather_array('phi', slice_obj).squeeze())
plt.title("φ")
plt.colorbar()
plt.subplot(1, 3, 2)
plt.title("T")
plt.scalar_field(dh.gather_array('T', slice_obj).squeeze())
plt.colorbar()
plt.subplot(1, 3, 3)
plt.title("∂φ")
plt.scalar_field(dh.gather_array('phidelta', slice_obj).squeeze())
plt.colorbar()
```
%% Cell type:code id: tags:
``` python
init()
plot()
print(dh)
```
%% Output
Name| Inner (min/max)| WithGl (min/max)
----------------------------------------------------
T| ( 0, 0)| ( 0, 0)
T_tmp| ( 0, 0)| ( 0, 0)
phi| ( 0, 1)| ( 0, 1)
phi_tmp| ( 0, 0)| ( 0, 0)
phidelta| ( 0, 0)| ( 0, 0)
%% Cell type:code id: tags:
``` python
if 'is_test_run' in globals():
time_loop(2)
assert np.isfinite(dh.max('phi'))
assert np.isfinite(dh.max('T'))
assert np.isfinite(dh.max('phidelta'))
else:
from time import perf_counter
vtk_writer = dh.create_vtk_writer('dentritic_growth_large', ['phi'])
last = perf_counter()
for i in range(4):
time_loop(100)
vtk_writer(i)
print("Step ", i, perf_counter() - last, dh.max('phi'))
last = perf_counter()
```
%% Output
Step 0 19.713090835999992 1.0
Step 1 19.673075279000045 1.0
Step 2 19.696444219 1.0
Step 3 19.752472744999977 1.0
from copy import copy, deepcopy
from pystencils.field import Field
from pystencils.data_types import TypedSymbol
from pystencils.typing import TypedSymbol
def test_field_access():
......
File moved
import pytest
import re
import sympy as sp
import pystencils
from pystencils.backends.cbackend import CBackend
class UnsupportedNode(pystencils.astnodes.Node):
def __init__(self):
super().__init__()
@pytest.mark.parametrize('type', ('float32', 'float64', 'int64'))
@pytest.mark.parametrize('negative', (False, 'Negative'))
@pytest.mark.parametrize('target', (pystencils.Target.CPU, pystencils.Target.GPU))
def test_print_infinity(type, negative, target):
x = pystencils.fields(f'x: {type}[1d]')
if negative:
assignment = pystencils.Assignment(x.center, -sp.oo)
else:
assignment = pystencils.Assignment(x.center, sp.oo)
ast = pystencils.create_kernel(assignment, data_type=type, target=target)
if target == pystencils.Target.GPU:
pytest.importorskip('cupy')
ast.compile()
print(ast.compile().code)
def test_print_unsupported_node():
with pytest.raises(NotImplementedError, match='CBackend does not support node of type UnsupportedNode'):
CBackend()(UnsupportedNode())
@pytest.mark.parametrize('dtype', ('float32', 'float64'))
@pytest.mark.parametrize('target', (pystencils.Target.CPU, pystencils.Target.GPU))
def test_print_subtraction(dtype, target):
a, b, c = sp.symbols("a b c")
x = pystencils.fields(f'x: {dtype}[3d]')
y = pystencils.fields(f'y: {dtype}[3d]')
config = pystencils.CreateKernelConfig(target=target, data_type=dtype)
update = pystencils.Assignment(x.center, y.center - a * b ** 8 + b * -1 / 42.0 - 2 * c ** 4)
ast = pystencils.create_kernel(update, config=config)
code = pystencils.get_code_str(ast)
assert "-1.0" not in code
def test_print_small_integer_pow():
printer = pystencils.backends.cbackend.CBackend()
x = sp.Symbol("x")
y = sp.Symbol("y")
n = pystencils.TypedSymbol("n", "int")
t = pystencils.TypedSymbol("t", "float32")
s = pystencils.TypedSymbol("s", "float32")
equs = [
pystencils.astnodes.SympyAssignment(y, 1/x),
pystencils.astnodes.SympyAssignment(y, x*x),
pystencils.astnodes.SympyAssignment(y, 1/(x*x)),
pystencils.astnodes.SympyAssignment(y, x**8),
pystencils.astnodes.SympyAssignment(y, x**(-8)),
pystencils.astnodes.SympyAssignment(y, x**9),
pystencils.astnodes.SympyAssignment(y, x**(-9)),
pystencils.astnodes.SympyAssignment(y, x**n),
pystencils.astnodes.SympyAssignment(y, sp.Pow(4, 4, evaluate=False)),
pystencils.astnodes.SympyAssignment(y, x**0.25),
pystencils.astnodes.SympyAssignment(y, x**y),
pystencils.astnodes.SympyAssignment(y, pystencils.typing.cast_functions.CastFunc(1/x, "float32")),
pystencils.astnodes.SympyAssignment(y, pystencils.typing.cast_functions.CastFunc(x*x, "float32")),
pystencils.astnodes.SympyAssignment(y, (t+s)**(-8)),
pystencils.astnodes.SympyAssignment(y, (t+s)**(-9)),
]
typed = pystencils.typing.transformations.add_types(equs, pystencils.CreateKernelConfig())
regexes = [
r"1\.0\s*/\s*\(?\s*x\s*\)?",
r"x\s*\*\s*x",
r"1\.0\s*/\s*\(\s*x\s*\*x\s*\)",
r"x(\s*\*\s*x){7}",
r"1\.0\s*/\s*\(\s*x(\s*\*\s*x){7}\s*\)",
r"pow\(\s*x\s*,\s*9(\.0)?\s*\)",
r"pow\(\s*x\s*,\s*-9(\.0)?\s*\)",
r"pow\(\s*x\s*,\s*\(?\s*\(\s*double\s*\)\s*\(\s*n\s*\)\s*\)?\s*\)",
r"\(\s*int[a-zA-Z0-9_]*\s*\)\s*\(+\s*4(\s*\*\s*4){3}\s*\)+",
r"pow\(\s*x\s*,\s*0\.25\s*\)",
r"pow\(\s*x\s*,\s*y\s*\)",
r"\(\s*float\s*\)[ ()]*1\.0\s*/\s*\(?\s*x\s*\)?",
r"\(\s*float\s*\)[ ()]*x\s*\*\s*x",
r"\(\s*float\s*\)\s*\(\s*1\.0f\s*/\s*\(\s*\(\s*s\s*\+\s*t\s*\)(\s*\*\s*\(\s*s\s*\+\s*t\s*\)){7}\s*\)",
r"powf\(\s*s\s*\+\s*t\s*,\s*-9\.0f\s*\)",
]
for r, e in zip(regexes, typed):
assert re.search(r, printer(e))
import numpy as np
import pystencils as ps
from pystencils.cpu.vectorization import get_supported_instruction_sets
from pystencils.cpu.vectorization import replace_inner_stride_with_one, vectorize
def test_basic_kernel():
for domain_shape in [(4, 5), (3, 4, 5)]:
dh = ps.create_data_handling(domain_size=domain_shape, periodicity=True)
assert all(dh.periodicity)
f = dh.add_array('f', values_per_cell=1)
tmp = dh.add_array('tmp', values_per_cell=1)
stencil_2d = [(1, 0), (-1, 0), (0, 1), (0, -1)]
stencil_3d = [(1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1)]
stencil = stencil_2d if dh.dim == 2 else stencil_3d
jacobi = ps.Assignment(tmp.center, sum(f.neighbors(stencil)) / len(stencil))
kernel = ps.create_kernel(jacobi).compile()
for b in dh.iterate(ghost_layers=1):
b['f'].fill(42)
dh.run_kernel(kernel)
for b in dh.iterate(ghost_layers=0):
np.testing.assert_equal(b['f'], 42)
float_seq = [1.0, 2.0, 3.0, 4.0]
int_seq = [1, 2, 3]
for op in ('min', 'max', 'sum'):
assert (dh.reduce_float_sequence(float_seq, op) == float_seq).all()
assert (dh.reduce_int_sequence(int_seq, op) == int_seq).all()
def test_basic_blocking_staggered():
f = ps.fields("f: double[2D]")
stag = ps.fields("stag(2): double[2D]", field_type=ps.FieldType.STAGGERED)
terms = [
f[0, 0] - f[-1, 0],
f[0, 0] - f[0, -1],
]
assignments = [ps.Assignment(stag.staggered_access(d), terms[i]) for i, d in enumerate(stag.staggered_stencil)]
kernel = ps.create_staggered_kernel(assignments, cpu_blocking=(3, 16)).compile()
reference_kernel = ps.create_staggered_kernel(assignments).compile()
f_arr = np.random.rand(80, 33)
stag_arr = np.zeros((80, 33, 3))
stag_ref = np.zeros((80, 33, 3))
kernel(f=f_arr, stag=stag_arr)
reference_kernel(f=f_arr, stag=stag_ref)
np.testing.assert_almost_equal(stag_arr, stag_ref)
def test_basic_vectorization():
supported_instruction_sets = get_supported_instruction_sets()
if supported_instruction_sets:
instruction_set = supported_instruction_sets[-1]
else:
instruction_set = None
f, g = ps.fields("f, g : double[2D]")
update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)]
ast = ps.create_kernel(update_rule)
replace_inner_stride_with_one(ast)
vectorize(ast, instruction_set=instruction_set)
func = ast.compile()
arr = np.ones((23 + 2, 17 + 2)) * 5.0
dst = np.zeros_like(arr)
func(g=dst, f=arr)
np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0)
\ No newline at end of file
......@@ -3,10 +3,12 @@ import numpy as np
import pytest
import pystencils as ps
from pystencils.astnodes import SympyAssignment
from pystencils.node_collection import NodeCollection
from pystencils.rng import PhiloxFourFloats, PhiloxTwoDoubles, AESNIFourFloats, AESNITwoDoubles, random_symbol
from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets
from pystencils.cpu.cpujit import get_compiler_config
from pystencils.data_types import TypedSymbol
from pystencils.typing import TypedSymbol
from pystencils.enums import Target
RNGs = {('philox', 'float'): PhiloxFourFloats, ('philox', 'double'): PhiloxTwoDoubles,
......@@ -20,16 +22,17 @@ if get_compiler_config()['os'] == 'windows':
instruction_sets.remove('avx')
if 'avx512' in instruction_sets and '/arch:avx512' not in get_compiler_config()['flags'].lower():
instruction_sets.remove('avx512')
if 'avx512vl' in instruction_sets and '/arch:avx512' not in get_compiler_config()['flags'].lower():
instruction_sets.remove('avx512vl')
@pytest.mark.parametrize('target,rng', (
(Target.CPU, 'philox'), (Target.CPU, 'aesni'), (Target.GPU, 'philox')))
@pytest.mark.parametrize('target, rng', ((Target.CPU, 'philox'), (Target.CPU, 'aesni'), (Target.GPU, 'philox')))
@pytest.mark.parametrize('precision', ('float', 'double'))
@pytest.mark.parametrize('dtype', ('float', 'double'))
def test_rng(target, rng, precision, dtype, t=124, offsets=(0, 0), keys=(0, 0), offset_values=None):
if target == Target.GPU:
pytest.importorskip('pycuda')
if instruction_sets and {'neon', 'sve', 'vsx', 'rvv'}.intersection(instruction_sets) and rng == 'aesni':
pytest.importorskip('cupy')
if instruction_sets and {'neon', 'sve', 'sve2', 'sme', 'vsx', 'rvv'}.intersection(instruction_sets) and rng == 'aesni':
pytest.xfail('AES not yet implemented for this architecture')
if rng == 'aesni' and len(keys) == 2:
keys *= 2
......@@ -42,7 +45,7 @@ def test_rng(target, rng, precision, dtype, t=124, offsets=(0, 0), keys=(0, 0),
dh.fill(f.name, 42.0)
rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets, keys=keys)
assignments = [rng_node] + [ps.Assignment(f(i), s) for i, s in enumerate(rng_node.result_symbols)]
assignments = [rng_node] + [SympyAssignment(f(i), s) for i, s in enumerate(rng_node.result_symbols)]
kernel = ps.create_kernel(assignments, target=dh.default_target).compile()
dh.all_to_gpu()
......@@ -119,7 +122,7 @@ def test_rng_offsets(kind, vectorized):
@pytest.mark.parametrize('rng', ('philox', 'aesni'))
@pytest.mark.parametrize('precision,dtype', (('float', 'float'), ('double', 'double')))
def test_rng_vectorized(target, rng, precision, dtype, t=130, offsets=(1, 3), keys=(0, 0), offset_values=None):
if (target in ['neon', 'vsx', 'rvv'] or target.startswith('sve')) and rng == 'aesni':
if (target in ['neon', 'vsx', 'rvv', 'sme'] or target.startswith('sve')) and rng == 'aesni':
pytest.xfail('AES not yet implemented for this architecture')
cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': True, 'instruction_set': target}
......@@ -130,7 +133,7 @@ def test_rng_vectorized(target, rng, precision, dtype, t=130, offsets=(1, 3), ke
ref = dh.add_array("ref", values_per_cell=4 if precision == 'float' else 2)
rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets)
assignments = [rng_node] + [ps.Assignment(ref(i), s) for i, s in enumerate(rng_node.result_symbols)]
assignments = [rng_node] + [SympyAssignment(ref(i), s) for i, s in enumerate(rng_node.result_symbols)]
kernel = ps.create_kernel(assignments, target=dh.default_target).compile()
kwargs = {'time_step': t}
......@@ -139,7 +142,7 @@ def test_rng_vectorized(target, rng, precision, dtype, t=130, offsets=(1, 3), ke
dh.run_kernel(kernel, **kwargs)
rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets)
assignments = [rng_node] + [ps.Assignment(f(i), s) for i, s in enumerate(rng_node.result_symbols)]
assignments = [rng_node] + [SympyAssignment(f(i), s) for i, s in enumerate(rng_node.result_symbols)]
kernel = ps.create_kernel(assignments, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile()
dh.run_kernel(kernel, **kwargs)
......@@ -153,24 +156,25 @@ def test_rng_vectorized(target, rng, precision, dtype, t=130, offsets=(1, 3), ke
@pytest.mark.parametrize('vectorized', (False, True))
def test_rng_symbol(vectorized):
"""Make sure that the RNG symbol generator generates symbols and that the resulting code compiles"""
cpu_vectorize_info = None
if vectorized:
if not instruction_sets:
pytest.skip("cannot detect CPU instruction set")
else:
cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': True,
'instruction_set': instruction_sets[-1]}
else:
cpu_vectorize_info = None
dh = ps.create_data_handling((8, 8), default_ghost_layers=0, default_target=Target.CPU)
f = dh.add_array("f", values_per_cell=2 * dh.dim, alignment=True)
ac = ps.AssignmentCollection([ps.Assignment(f(i), 0) for i in range(f.shape[-1])])
rng_symbol_gen = random_symbol(ac.subexpressions, dim=dh.dim)
nc = NodeCollection([SympyAssignment(f(i), 0) for i in range(f.shape[-1])])
subexpressions = []
rng_symbol_gen = random_symbol(subexpressions, dim=dh.dim)
for i in range(f.shape[-1]):
ac.main_assignments[i] = ps.Assignment(ac.main_assignments[i].lhs, next(rng_symbol_gen))
symbols = [a.rhs for a in ac.main_assignments]
nc.all_assignments[i] = SympyAssignment(nc.all_assignments[i].lhs, next(rng_symbol_gen))
symbols = [a.rhs for a in nc.all_assignments]
[nc.all_assignments.insert(0, subexpression) for subexpression in subexpressions]
assert len(symbols) == f.shape[-1] and len(set(symbols)) == f.shape[-1]
ps.create_kernel(ac, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile()
ps.create_kernel(nc, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile()
@pytest.mark.parametrize('vectorized', (False, True))
......
......@@ -71,6 +71,7 @@ def test_split_inner_loop():
ast = ps.create_kernel(ac)
code = ps.get_code_str(ast)
ps.show_code(ast)
# we have four inner loops as indicated in split groups (4 elements) plus one outer loop
assert code.count('for') == 5
ast = ps.create_kernel(ac, target=ps.Target.GPU)
......
from sys import version_info as vs
import pytest
import pystencils.config
import sympy as sp
import pystencils as ps
from pystencils import Assignment, AssignmentCollection, fields
from pystencils.simp import subexpression_substitution_in_main_assignments
from pystencils.simp import add_subexpressions_for_divisions
from pystencils.simp import add_subexpressions_for_sums
from pystencils.simp import add_subexpressions_for_field_reads
from pystencils.simp.simplifications import add_subexpressions_for_constants
from pystencils import Assignment, AssignmentCollection, fields
from pystencils.typing import BasicType, TypedSymbol
a, b, c, d, x, y, z = sp.symbols("a b c d x y z")
s0, s1, s2, s3 = sp.symbols("s_:4")
......@@ -130,40 +133,52 @@ def test_add_subexpressions_for_sums():
def test_add_subexpressions_for_field_reads():
s, v = fields("s(5), v(5): double[2D]")
subexpressions = []
main = [
Assignment(s[0, 0](0), 3 * v[0, 0](0)),
Assignment(s[0, 0](1), 10 * v[0, 0](1))
]
main = [Assignment(s[0, 0](0), 3 * v[0, 0](0)),
Assignment(s[0, 0](1), 10 * v[0, 0](1))]
ac = AssignmentCollection(main, subexpressions)
assert len(ac.subexpressions) == 0
ac = add_subexpressions_for_field_reads(ac)
assert len(ac.subexpressions) == 2
ac2 = add_subexpressions_for_field_reads(ac)
assert len(ac2.subexpressions) == 2
ac3 = add_subexpressions_for_field_reads(ac, data_type="float32")
assert len(ac3.subexpressions) == 2
assert isinstance(ac3.subexpressions[0].lhs, TypedSymbol)
assert ac3.subexpressions[0].lhs.dtype == BasicType("float32")
# added check for early out of add_subexpressions_for_field_reads is no fields appear on the rhs (See #92)
main = [Assignment(s[0, 0](0), 3.0),
Assignment(s[0, 0](1), 4.0)]
ac4 = AssignmentCollection(main, subexpressions)
assert len(ac4.subexpressions) == 0
ac5 = add_subexpressions_for_field_reads(ac4)
assert ac5 is not None
assert ac4 is ac5
@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU))
@pytest.mark.parametrize('simplification', (True, False))
@pytest.mark.parametrize('dtype', ('float32', 'float64'))
@pytest.mark.skipif((vs.major, vs.minor, vs.micro) == (3, 8, 2), reason="does not work on python 3.8.2 for some reason")
def test_sympy_optimizations(target, simplification):
def test_sympy_optimizations(target, dtype):
if target == ps.Target.GPU:
pytest.importorskip("pycuda")
src, dst = ps.fields('src, dst: float32[2d]')
pytest.importorskip("cupy")
src, dst = ps.fields(f'src, dst: {dtype}[2d]')
# Triggers Sympy's expm1 optimization
# Sympy's expm1 optimization is tedious to use and the behaviour is highly depended on the sympy version. In
# some cases the exp expression has to be encapsulated in brackets or multiplied with 1 or 1.0
# for sympy to work properly ...
assignments = ps.AssignmentCollection({
src[0, 0]: 1.0 * (sp.exp(dst[0, 0]) - 1)
})
config = ps.CreateKernelConfig(target=target, default_assignment_simplifications=simplification)
config = pystencils.config.CreateKernelConfig(target=target, default_number_float=dtype)
ast = ps.create_kernel(assignments, config=config)
ps.show_code(ast)
code = ps.get_code_str(ast)
if simplification:
assert 'expm1(' in code
else:
assert 'expm1(' not in code
if dtype == 'float32':
assert 'expf(' in code
elif dtype == 'float64':
assert 'exp(' in code
@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU))
......@@ -171,19 +186,15 @@ def test_sympy_optimizations(target, simplification):
@pytest.mark.skipif((vs.major, vs.minor, vs.micro) == (3, 8, 2), reason="does not work on python 3.8.2 for some reason")
def test_evaluate_constant_terms(target, simplification):
if target == ps.Target.GPU:
pytest.importorskip("pycuda")
pytest.importorskip("cupy")
src, dst = ps.fields('src, dst: float32[2d]')
# Triggers Sympy's cos optimization
# cos of a number will always be simplified
assignments = ps.AssignmentCollection({
src[0, 0]: -sp.cos(1) + dst[0, 0]
})
config = ps.CreateKernelConfig(target=target, default_assignment_simplifications=simplification)
config = pystencils.config.CreateKernelConfig(target=target, default_assignment_simplifications=simplification)
ast = ps.create_kernel(assignments, config=config)
code = ps.get_code_str(ast)
if simplification:
assert 'cos(' not in code
else:
assert 'cos(' in code
print(code)
assert 'cos(' not in code
import numpy as np
import pytest
import pystencils
import sympy as sp
from pystencils import Assignment, Field, create_kernel, fields
......@@ -104,13 +106,20 @@ def test_loop_independence_checks():
Assignment(g[0, 0], f[1, 0])])
assert 'Field g is written at two different locations' in str(e.value)
# This is allowed - because only one element of g is accessed
# This is not allowed - because this is not SSA (it can be overwritten with allow_double_writes)
with pytest.raises(ValueError) as e:
create_kernel([Assignment(g[0, 2], f[0, 1]),
Assignment(g[0, 2], 2 * g[0, 2])])
# This is allowed - because allow_double_writes is True now
create_kernel([Assignment(g[0, 2], f[0, 1]),
Assignment(g[0, 2], 2 * g[0, 2])])
Assignment(g[0, 2], 2 * g[0, 2])],
config=pystencils.CreateKernelConfig(allow_double_writes=True))
create_kernel([Assignment(v[0, 2](1), f[0, 1]),
Assignment(v[0, 1](0), 4),
Assignment(v[0, 2](1), 2 * v[0, 2](1))])
with pytest.raises(ValueError) as e:
create_kernel([Assignment(v[0, 2](1), f[0, 1]),
Assignment(v[0, 1](0), 4),
Assignment(v[0, 2](1), 2 * v[0, 2](1))])
with pytest.raises(ValueError) as e:
create_kernel([Assignment(g[0, 1], 3),
......
import numpy as np
import sympy as sp
import pytest
from pystencils import (
Assignment,
Field,
TypedSymbol,
create_kernel,
make_slice,
Target,
create_data_handling,
)
from pystencils.simp import sympy_cse_on_assignment_list
@pytest.mark.parametrize("target", [Target.CPU, Target.GPU])
def test_sliced_iteration(target):
if target == Target.GPU:
pytest.importorskip("cupy")
size = (4, 4)
dh = create_data_handling(size, default_target=target, default_ghost_layers=0)
src_field = dh.add_array("src", 1)
dst_field = dh.add_array("dst", 1)
dh.fill(src_field.name, 1.0, ghost_layers=True)
dh.fill(dst_field.name, 0.0, ghost_layers=True)
a, b = sp.symbols("a b")
update_rule = Assignment(
dst_field[0, 0],
(
a * src_field[0, 1]
+ a * src_field[0, -1]
+ b * src_field[1, 0]
+ b * src_field[-1, 0]
)
/ 4,
)
s = make_slice[1:3, 1]
kernel = create_kernel(
sympy_cse_on_assignment_list([update_rule]), iteration_slice=s, target=target
).compile()
if target == Target.GPU:
dh.all_to_gpu()
dh.run_kernel(kernel, a=1.0, b=1.0)
if target == Target.GPU:
dh.all_to_cpu()
expected_result = np.zeros(size)
expected_result[1:3, 1] = 1
np.testing.assert_almost_equal(dh.gather_array(dst_field.name), expected_result)
@pytest.mark.parametrize("target", [Target.CPU, Target.GPU])
def test_symbols_in_slice(target):
if target == Target.GPU:
pytest.xfail("Iteration slices including arbitrary symbols are currently broken on GPU")
size = (4, 4)
dh = create_data_handling(size, default_target=target, default_ghost_layers=0)
src_field = dh.add_array("src", 1)
dst_field = dh.add_array("dst", 1)
dh.fill(src_field.name, 1.0, ghost_layers=True)
dh.fill(dst_field.name, 0.0, ghost_layers=True)
a, b = sp.symbols("a b")
update_rule = Assignment(
dst_field[0, 0],
(
a * src_field[0, 1]
+ a * src_field[0, -1]
+ b * src_field[1, 0]
+ b * src_field[-1, 0]
)
/ 4,
)
x_end = TypedSymbol("x_end", "int")
s = make_slice[1:x_end, 1]
x_end_value = size[1] - 1
kernel = create_kernel(
sympy_cse_on_assignment_list([update_rule]), iteration_slice=s, target=target
).compile()
if target == Target.GPU:
dh.all_to_gpu()
dh.run_kernel(kernel, a=1.0, b=1.0, x_end=x_end_value)
if target == Target.GPU:
dh.all_to_cpu()
expected_result = np.zeros(size)
expected_result[1:x_end_value, 1] = 1
np.testing.assert_almost_equal(dh.gather_array(dst_field.name), expected_result)
%% Cell type:code id: tags:
``` python
import pytest
pytest.importorskip('waLBerla')
```
%% Output
<module 'waLBerla' from '/Users/holzer/walberla/python/waLBerla/__init__.py'>
%% Cell type:code id: tags:
``` python
from pystencils.session import *
from time import perf_counter
from statistics import median
from functools import partial
```
%% Cell type:markdown id: tags:
## Benchmark for Python call overhead
%% Cell type:code id: tags:
``` python
inner_repeats = 100
outer_repeats = 5
sizes = [2**i for i in range(1, 8)]
sizes
```
%% Output
$\displaystyle \left[ 2, \ 4, \ 8, \ 16, \ 32, \ 64, \ 128\right]$
[2, 4, 8, 16, 32, 64, 128]
%% Cell type:code id: tags:
``` python
def benchmark_pure(domain_size, extract_first=False):
src = np.zeros(domain_size)
dst = np.zeros_like(src)
f_src, f_dst = ps.fields("src, dst", src=src, dst=dst)
kernel = ps.create_kernel(ps.Assignment(f_dst.center, f_src.center)).compile()
if extract_first:
kernel = kernel.kernel
start = perf_counter()
for i in range(inner_repeats):
kernel(src=src, dst=dst)
src, dst = dst, src
end = perf_counter()
else:
start = perf_counter()
for i in range(inner_repeats):
kernel(src=src, dst=dst)
src, dst = dst, src
end = perf_counter()
return (end - start) / inner_repeats
def benchmark_datahandling(domain_size, parallel=False):
dh = ps.create_data_handling(domain_size, parallel=parallel)
f_src = dh.add_array('src')
f_dst = dh.add_array('dst')
kernel = ps.create_kernel(ps.Assignment(f_dst.center, f_src.center)).compile()
start = perf_counter()
for i in range(inner_repeats):
dh.run_kernel(kernel)
dh.swap('src', 'dst')
end = perf_counter()
return (end - start) / inner_repeats
name_to_func = {
'pure_extract': partial(benchmark_pure, extract_first=True),
'pure_no_extract': partial(benchmark_pure, extract_first=False),
'dh_serial': partial(benchmark_datahandling, parallel=False),
'dh_parallel': partial(benchmark_datahandling, parallel=True),
}
```
%% Cell type:code id: tags:
``` python
result = {'block_size': [],
'name': [],
'time': []}
for bs in sizes:
print("Computing size ", bs)
for name, func in name_to_func.items():
for i in range(outer_repeats):
time = func((bs, bs))
result['block_size'].append(bs)
result['name'].append(name)
result['time'].append(time)
```
%% Output
Computing size 2
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/var/folders/07/0d7kq8fd0sx24cs53zz90_qc0000gp/T/ipykernel_12649/2009975470.py in <module>
7 for name, func in name_to_func.items():
8 for i in range(outer_repeats):
----> 9 time = func((bs, bs))
10 result['block_size'].append(bs)
11 result['name'].append(name)
/var/folders/07/0d7kq8fd0sx24cs53zz90_qc0000gp/T/ipykernel_12649/3509370390.py in benchmark_datahandling(domain_size, parallel)
20
21 def benchmark_datahandling(domain_size, parallel=False):
---> 22 dh = ps.create_data_handling(domain_size, parallel=parallel)
23 f_src = dh.add_array('src')
24 f_dst = dh.add_array('dst')
~/pystencils/pystencils/pystencils/datahandling/__init__.py in create_data_handling(domain_size, periodicity, default_layout, default_target, parallel, default_ghost_layers)
44 if parallel:
45 if wlb is None:
---> 46 raise ValueError("Cannot create parallel data handling because walberla module is not available")
47
48 if periodicity is False or periodicity is None:
ValueError: Cannot create parallel data handling because walberla module is not available
%% Cell type:code id: tags:
``` python
if 'is_test_run' not in globals():
import pandas as pd
import seaborn as sns
data = pd.DataFrame.from_dict(result)
plt.subplot(1,2,1)
sns.barplot(x='block_size', y='time', hue='name', data=data, alpha=0.6)
plt.yscale('log')
plt.subplot(1,2,2)
data = pd.DataFrame.from_dict(result)
sns.barplot(x='block_size', y='time', hue='name', data=data, alpha=0.6)
```
......@@ -9,6 +9,7 @@
"""
import pystencils
import pystencils.astnodes
import pystencils.config
def test_source_code_comment():
......@@ -19,7 +20,7 @@ def test_source_code_comment():
{a.center(): b[0, 2] + b[0, 0]}, {}
)
config = pystencils.CreateKernelConfig(target=pystencils.Target.CPU)
config = pystencils.config.CreateKernelConfig(target=pystencils.Target.CPU)
ast = pystencils.create_kernel(assignments, config=config)
ast.body.append(pystencils.astnodes.SourceCodeComment("Hallo"))
......