Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 841 additions and 53 deletions
File moved
import pytest
import re
import sympy as sp
import pystencils
from pystencils.backends.cbackend import CBackend
class UnsupportedNode(pystencils.astnodes.Node):
def __init__(self):
super().__init__()
@pytest.mark.parametrize('type', ('float32', 'float64', 'int64'))
@pytest.mark.parametrize('negative', (False, 'Negative'))
@pytest.mark.parametrize('target', (pystencils.Target.CPU, pystencils.Target.GPU))
def test_print_infinity(type, negative, target):
x = pystencils.fields(f'x: {type}[1d]')
if negative:
assignment = pystencils.Assignment(x.center, -sp.oo)
else:
assignment = pystencils.Assignment(x.center, sp.oo)
ast = pystencils.create_kernel(assignment, data_type=type, target=target)
if target == pystencils.Target.GPU:
pytest.importorskip('cupy')
ast.compile()
print(ast.compile().code)
def test_print_unsupported_node():
with pytest.raises(NotImplementedError, match='CBackend does not support node of type UnsupportedNode'):
CBackend()(UnsupportedNode())
@pytest.mark.parametrize('dtype', ('float32', 'float64'))
@pytest.mark.parametrize('target', (pystencils.Target.CPU, pystencils.Target.GPU))
def test_print_subtraction(dtype, target):
a, b, c = sp.symbols("a b c")
x = pystencils.fields(f'x: {dtype}[3d]')
y = pystencils.fields(f'y: {dtype}[3d]')
config = pystencils.CreateKernelConfig(target=target, data_type=dtype)
update = pystencils.Assignment(x.center, y.center - a * b ** 8 + b * -1 / 42.0 - 2 * c ** 4)
ast = pystencils.create_kernel(update, config=config)
code = pystencils.get_code_str(ast)
assert "-1.0" not in code
def test_print_small_integer_pow():
printer = pystencils.backends.cbackend.CBackend()
x = sp.Symbol("x")
y = sp.Symbol("y")
n = pystencils.TypedSymbol("n", "int")
t = pystencils.TypedSymbol("t", "float32")
s = pystencils.TypedSymbol("s", "float32")
equs = [
pystencils.astnodes.SympyAssignment(y, 1/x),
pystencils.astnodes.SympyAssignment(y, x*x),
pystencils.astnodes.SympyAssignment(y, 1/(x*x)),
pystencils.astnodes.SympyAssignment(y, x**8),
pystencils.astnodes.SympyAssignment(y, x**(-8)),
pystencils.astnodes.SympyAssignment(y, x**9),
pystencils.astnodes.SympyAssignment(y, x**(-9)),
pystencils.astnodes.SympyAssignment(y, x**n),
pystencils.astnodes.SympyAssignment(y, sp.Pow(4, 4, evaluate=False)),
pystencils.astnodes.SympyAssignment(y, x**0.25),
pystencils.astnodes.SympyAssignment(y, x**y),
pystencils.astnodes.SympyAssignment(y, pystencils.typing.cast_functions.CastFunc(1/x, "float32")),
pystencils.astnodes.SympyAssignment(y, pystencils.typing.cast_functions.CastFunc(x*x, "float32")),
pystencils.astnodes.SympyAssignment(y, (t+s)**(-8)),
pystencils.astnodes.SympyAssignment(y, (t+s)**(-9)),
]
typed = pystencils.typing.transformations.add_types(equs, pystencils.CreateKernelConfig())
regexes = [
r"1\.0\s*/\s*\(?\s*x\s*\)?",
r"x\s*\*\s*x",
r"1\.0\s*/\s*\(\s*x\s*\*x\s*\)",
r"x(\s*\*\s*x){7}",
r"1\.0\s*/\s*\(\s*x(\s*\*\s*x){7}\s*\)",
r"pow\(\s*x\s*,\s*9(\.0)?\s*\)",
r"pow\(\s*x\s*,\s*-9(\.0)?\s*\)",
r"pow\(\s*x\s*,\s*\(?\s*\(\s*double\s*\)\s*\(\s*n\s*\)\s*\)?\s*\)",
r"\(\s*int[a-zA-Z0-9_]*\s*\)\s*\(+\s*4(\s*\*\s*4){3}\s*\)+",
r"pow\(\s*x\s*,\s*0\.25\s*\)",
r"pow\(\s*x\s*,\s*y\s*\)",
r"\(\s*float\s*\)[ ()]*1\.0\s*/\s*\(?\s*x\s*\)?",
r"\(\s*float\s*\)[ ()]*x\s*\*\s*x",
r"\(\s*float\s*\)\s*\(\s*1\.0f\s*/\s*\(\s*\(\s*s\s*\+\s*t\s*\)(\s*\*\s*\(\s*s\s*\+\s*t\s*\)){7}\s*\)",
r"powf\(\s*s\s*\+\s*t\s*,\s*-9\.0f\s*\)",
]
for r, e in zip(regexes, typed):
assert re.search(r, printer(e))
import numpy as np
import pystencils as ps
from pystencils.cpu.vectorization import get_supported_instruction_sets
from pystencils.cpu.vectorization import replace_inner_stride_with_one, vectorize
def test_basic_kernel():
for domain_shape in [(4, 5), (3, 4, 5)]:
dh = ps.create_data_handling(domain_size=domain_shape, periodicity=True)
assert all(dh.periodicity)
f = dh.add_array('f', values_per_cell=1)
tmp = dh.add_array('tmp', values_per_cell=1)
stencil_2d = [(1, 0), (-1, 0), (0, 1), (0, -1)]
stencil_3d = [(1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1)]
stencil = stencil_2d if dh.dim == 2 else stencil_3d
jacobi = ps.Assignment(tmp.center, sum(f.neighbors(stencil)) / len(stencil))
kernel = ps.create_kernel(jacobi).compile()
for b in dh.iterate(ghost_layers=1):
b['f'].fill(42)
dh.run_kernel(kernel)
for b in dh.iterate(ghost_layers=0):
np.testing.assert_equal(b['f'], 42)
float_seq = [1.0, 2.0, 3.0, 4.0]
int_seq = [1, 2, 3]
for op in ('min', 'max', 'sum'):
assert (dh.reduce_float_sequence(float_seq, op) == float_seq).all()
assert (dh.reduce_int_sequence(int_seq, op) == int_seq).all()
def test_basic_blocking_staggered():
f = ps.fields("f: double[2D]")
stag = ps.fields("stag(2): double[2D]", field_type=ps.FieldType.STAGGERED)
terms = [
f[0, 0] - f[-1, 0],
f[0, 0] - f[0, -1],
]
assignments = [ps.Assignment(stag.staggered_access(d), terms[i]) for i, d in enumerate(stag.staggered_stencil)]
kernel = ps.create_staggered_kernel(assignments, cpu_blocking=(3, 16)).compile()
reference_kernel = ps.create_staggered_kernel(assignments).compile()
f_arr = np.random.rand(80, 33)
stag_arr = np.zeros((80, 33, 3))
stag_ref = np.zeros((80, 33, 3))
kernel(f=f_arr, stag=stag_arr)
reference_kernel(f=f_arr, stag=stag_ref)
np.testing.assert_almost_equal(stag_arr, stag_ref)
def test_basic_vectorization():
supported_instruction_sets = get_supported_instruction_sets()
if supported_instruction_sets:
instruction_set = supported_instruction_sets[-1]
else:
instruction_set = None
f, g = ps.fields("f, g : double[2D]")
update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)]
ast = ps.create_kernel(update_rule)
replace_inner_stride_with_one(ast)
vectorize(ast, instruction_set=instruction_set)
func = ast.compile()
arr = np.ones((23 + 2, 17 + 2)) * 5.0
dst = np.zeros_like(arr)
func(g=dst, f=arr)
np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0)
\ No newline at end of file
...@@ -3,10 +3,13 @@ import numpy as np ...@@ -3,10 +3,13 @@ import numpy as np
import pytest import pytest
import pystencils as ps import pystencils as ps
from pystencils.astnodes import SympyAssignment
from pystencils.node_collection import NodeCollection
from pystencils.rng import PhiloxFourFloats, PhiloxTwoDoubles, AESNIFourFloats, AESNITwoDoubles, random_symbol from pystencils.rng import PhiloxFourFloats, PhiloxTwoDoubles, AESNIFourFloats, AESNITwoDoubles, random_symbol
from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets
from pystencils.cpu.cpujit import get_compiler_config from pystencils.cpu.cpujit import get_compiler_config
from pystencils.data_types import TypedSymbol from pystencils.typing import TypedSymbol
from pystencils.enums import Target
RNGs = {('philox', 'float'): PhiloxFourFloats, ('philox', 'double'): PhiloxTwoDoubles, RNGs = {('philox', 'float'): PhiloxFourFloats, ('philox', 'double'): PhiloxTwoDoubles,
('aesni', 'float'): AESNIFourFloats, ('aesni', 'double'): AESNITwoDoubles} ('aesni', 'float'): AESNIFourFloats, ('aesni', 'double'): AESNITwoDoubles}
...@@ -19,15 +22,17 @@ if get_compiler_config()['os'] == 'windows': ...@@ -19,15 +22,17 @@ if get_compiler_config()['os'] == 'windows':
instruction_sets.remove('avx') instruction_sets.remove('avx')
if 'avx512' in instruction_sets and '/arch:avx512' not in get_compiler_config()['flags'].lower(): if 'avx512' in instruction_sets and '/arch:avx512' not in get_compiler_config()['flags'].lower():
instruction_sets.remove('avx512') instruction_sets.remove('avx512')
if 'avx512vl' in instruction_sets and '/arch:avx512' not in get_compiler_config()['flags'].lower():
instruction_sets.remove('avx512vl')
@pytest.mark.parametrize('target,rng', (('cpu', 'philox'), ('cpu', 'aesni'), ('gpu', 'philox'))) @pytest.mark.parametrize('target, rng', ((Target.CPU, 'philox'), (Target.CPU, 'aesni'), (Target.GPU, 'philox')))
@pytest.mark.parametrize('precision', ('float', 'double')) @pytest.mark.parametrize('precision', ('float', 'double'))
@pytest.mark.parametrize('dtype', ('float', 'double')) @pytest.mark.parametrize('dtype', ('float', 'double'))
def test_rng(target, rng, precision, dtype, t=124, offsets=(0, 0), keys=(0, 0), offset_values=None): def test_rng(target, rng, precision, dtype, t=124, offsets=(0, 0), keys=(0, 0), offset_values=None):
if target == 'gpu': if target == Target.GPU:
pytest.importorskip('pycuda') pytest.importorskip('cupy')
if instruction_sets and set(['neon', 'vsx']).intersection(instruction_sets) and rng == 'aesni': if instruction_sets and {'neon', 'sve', 'sve2', 'sme', 'vsx', 'rvv'}.intersection(instruction_sets) and rng == 'aesni':
pytest.xfail('AES not yet implemented for this architecture') pytest.xfail('AES not yet implemented for this architecture')
if rng == 'aesni' and len(keys) == 2: if rng == 'aesni' and len(keys) == 2:
keys *= 2 keys *= 2
...@@ -40,7 +45,7 @@ def test_rng(target, rng, precision, dtype, t=124, offsets=(0, 0), keys=(0, 0), ...@@ -40,7 +45,7 @@ def test_rng(target, rng, precision, dtype, t=124, offsets=(0, 0), keys=(0, 0),
dh.fill(f.name, 42.0) dh.fill(f.name, 42.0)
rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets, keys=keys) rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets, keys=keys)
assignments = [rng_node] + [ps.Assignment(f(i), s) for i, s in enumerate(rng_node.result_symbols)] assignments = [rng_node] + [SympyAssignment(f(i), s) for i, s in enumerate(rng_node.result_symbols)]
kernel = ps.create_kernel(assignments, target=dh.default_target).compile() kernel = ps.create_kernel(assignments, target=dh.default_target).compile()
dh.all_to_gpu() dh.all_to_gpu()
...@@ -74,9 +79,8 @@ def test_rng(target, rng, precision, dtype, t=124, offsets=(0, 0), keys=(0, 0), ...@@ -74,9 +79,8 @@ def test_rng(target, rng, precision, dtype, t=124, offsets=(0, 0), keys=(0, 0),
int_reference = np.empty(dh.shape + (4,), dtype=int) int_reference = np.empty(dh.shape + (4,), dtype=int)
for x in range(dh.shape[0]): for x in range(dh.shape[0]):
for y in range(dh.shape[1]): for y in range(dh.shape[1]):
r = Philox(counter=t + (x + offset_values[0]) * 2 ** 32 + (y + offset_values[1]) * 2 ** 64, r = Philox(counter=t + (x + offset_values[0]) * 2 ** 32 + (y + offset_values[1]) * 2 ** 64 - 1,
key=keys[0] + keys[1] * 2 ** 32, number=4, width=32, mode="sequence") key=keys[0] + keys[1] * 2 ** 32, number=4, width=32, mode="sequence")
r.advance(-4, counter=False)
int_reference[x, y, :] = r.random_raw(size=4) int_reference[x, y, :] = r.random_raw(size=4)
if precision == 'float' or dtype == 'float': if precision == 'float' or dtype == 'float':
...@@ -106,11 +110,11 @@ def test_rng_offsets(kind, vectorized): ...@@ -106,11 +110,11 @@ def test_rng_offsets(kind, vectorized):
else: else:
test = test_rng test = test_rng
if kind == 'value': if kind == 'value':
test(instruction_sets[0] if vectorized else 'cpu', 'philox', 'float', 'float', t=8, test(instruction_sets[-1] if vectorized else Target.CPU, 'philox', 'float', 'float', t=8,
offsets=(6, 7), keys=(5, 309)) offsets=(6, 7), keys=(5, 309))
elif kind == 'symbol': elif kind == 'symbol':
offsets = (TypedSymbol("x0", np.uint32), TypedSymbol("y0", np.uint32)) offsets = (TypedSymbol("x0", np.uint32), TypedSymbol("y0", np.uint32))
test(instruction_sets[0] if vectorized else 'cpu', 'philox', 'float', 'float', t=8, test(instruction_sets[-1] if vectorized else Target.GPU, 'philox', 'float', 'float', t=8,
offsets=offsets, offset_values=(6, 7), keys=(5, 309)) offsets=offsets, offset_values=(6, 7), keys=(5, 309))
...@@ -118,18 +122,18 @@ def test_rng_offsets(kind, vectorized): ...@@ -118,18 +122,18 @@ def test_rng_offsets(kind, vectorized):
@pytest.mark.parametrize('rng', ('philox', 'aesni')) @pytest.mark.parametrize('rng', ('philox', 'aesni'))
@pytest.mark.parametrize('precision,dtype', (('float', 'float'), ('double', 'double'))) @pytest.mark.parametrize('precision,dtype', (('float', 'float'), ('double', 'double')))
def test_rng_vectorized(target, rng, precision, dtype, t=130, offsets=(1, 3), keys=(0, 0), offset_values=None): def test_rng_vectorized(target, rng, precision, dtype, t=130, offsets=(1, 3), keys=(0, 0), offset_values=None):
if target in ['neon', 'vsx'] and rng == 'aesni': if (target in ['neon', 'vsx', 'rvv', 'sme'] or target.startswith('sve')) and rng == 'aesni':
pytest.xfail('AES not yet implemented for this architecture') pytest.xfail('AES not yet implemented for this architecture')
cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': True, 'instruction_set': target} cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': True, 'instruction_set': target}
dh = ps.create_data_handling((17, 17), default_ghost_layers=0, default_target='cpu') dh = ps.create_data_handling((131, 131), default_ghost_layers=0, default_target=Target.CPU)
f = dh.add_array("f", values_per_cell=4 if precision == 'float' else 2, f = dh.add_array("f", values_per_cell=4 if precision == 'float' else 2,
dtype=np.float32 if dtype == 'float' else np.float64, alignment=True) dtype=np.float32 if dtype == 'float' else np.float64, alignment=True)
dh.fill(f.name, 42.0) dh.fill(f.name, 42.0)
ref = dh.add_array("ref", values_per_cell=4 if precision == 'float' else 2) ref = dh.add_array("ref", values_per_cell=4 if precision == 'float' else 2)
rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets) rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets)
assignments = [rng_node] + [ps.Assignment(ref(i), s) for i, s in enumerate(rng_node.result_symbols)] assignments = [rng_node] + [SympyAssignment(ref(i), s) for i, s in enumerate(rng_node.result_symbols)]
kernel = ps.create_kernel(assignments, target=dh.default_target).compile() kernel = ps.create_kernel(assignments, target=dh.default_target).compile()
kwargs = {'time_step': t} kwargs = {'time_step': t}
...@@ -138,7 +142,7 @@ def test_rng_vectorized(target, rng, precision, dtype, t=130, offsets=(1, 3), ke ...@@ -138,7 +142,7 @@ def test_rng_vectorized(target, rng, precision, dtype, t=130, offsets=(1, 3), ke
dh.run_kernel(kernel, **kwargs) dh.run_kernel(kernel, **kwargs)
rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets) rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets)
assignments = [rng_node] + [ps.Assignment(f(i), s) for i, s in enumerate(rng_node.result_symbols)] assignments = [rng_node] + [SympyAssignment(f(i), s) for i, s in enumerate(rng_node.result_symbols)]
kernel = ps.create_kernel(assignments, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile() kernel = ps.create_kernel(assignments, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile()
dh.run_kernel(kernel, **kwargs) dh.run_kernel(kernel, **kwargs)
...@@ -152,31 +156,32 @@ def test_rng_vectorized(target, rng, precision, dtype, t=130, offsets=(1, 3), ke ...@@ -152,31 +156,32 @@ def test_rng_vectorized(target, rng, precision, dtype, t=130, offsets=(1, 3), ke
@pytest.mark.parametrize('vectorized', (False, True)) @pytest.mark.parametrize('vectorized', (False, True))
def test_rng_symbol(vectorized): def test_rng_symbol(vectorized):
"""Make sure that the RNG symbol generator generates symbols and that the resulting code compiles""" """Make sure that the RNG symbol generator generates symbols and that the resulting code compiles"""
cpu_vectorize_info = None
if vectorized: if vectorized:
if not instruction_sets: if not instruction_sets:
pytest.skip("cannot detect CPU instruction set") pytest.skip("cannot detect CPU instruction set")
else: else:
cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': True, cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': True,
'instruction_set': instruction_sets[0]} 'instruction_set': instruction_sets[-1]}
else:
cpu_vectorize_info = None dh = ps.create_data_handling((8, 8), default_ghost_layers=0, default_target=Target.CPU)
dh = ps.create_data_handling((8, 8), default_ghost_layers=0, default_target="cpu")
f = dh.add_array("f", values_per_cell=2 * dh.dim, alignment=True) f = dh.add_array("f", values_per_cell=2 * dh.dim, alignment=True)
ac = ps.AssignmentCollection([ps.Assignment(f(i), 0) for i in range(f.shape[-1])]) nc = NodeCollection([SympyAssignment(f(i), 0) for i in range(f.shape[-1])])
rng_symbol_gen = random_symbol(ac.subexpressions, dim=dh.dim) subexpressions = []
rng_symbol_gen = random_symbol(subexpressions, dim=dh.dim)
for i in range(f.shape[-1]): for i in range(f.shape[-1]):
ac.main_assignments[i] = ps.Assignment(ac.main_assignments[i].lhs, next(rng_symbol_gen)) nc.all_assignments[i] = SympyAssignment(nc.all_assignments[i].lhs, next(rng_symbol_gen))
symbols = [a.rhs for a in ac.main_assignments] symbols = [a.rhs for a in nc.all_assignments]
[nc.all_assignments.insert(0, subexpression) for subexpression in subexpressions]
assert len(symbols) == f.shape[-1] and len(set(symbols)) == f.shape[-1] assert len(symbols) == f.shape[-1] and len(set(symbols)) == f.shape[-1]
ps.create_kernel(ac, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile() ps.create_kernel(nc, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile()
@pytest.mark.parametrize('vectorized', (False, True)) @pytest.mark.parametrize('vectorized', (False, True))
def test_staggered(vectorized): def test_staggered(vectorized):
"""Make sure that the RNG counter can be substituted during loop cutting""" """Make sure that the RNG counter can be substituted during loop cutting"""
dh = ps.create_data_handling((8, 8), default_ghost_layers=0, default_target="cpu") dh = ps.create_data_handling((8, 8), default_ghost_layers=0, default_target=Target.CPU)
j = dh.add_array("j", values_per_cell=dh.dim, field_type=ps.FieldType.STAGGERED_FLUX) j = dh.add_array("j", values_per_cell=dh.dim, field_type=ps.FieldType.STAGGERED_FLUX)
a = ps.AssignmentCollection([ps.Assignment(j.staggered_access(n), 0) for n in j.staggered_stencil]) a = ps.AssignmentCollection([ps.Assignment(j.staggered_access(n), 0) for n in j.staggered_stencil])
rng_symbol_gen = random_symbol(a.subexpressions, dim=dh.dim, rng_node=PhiloxTwoDoubles) rng_symbol_gen = random_symbol(a.subexpressions, dim=dh.dim, rng_node=PhiloxTwoDoubles)
...@@ -189,16 +194,16 @@ def test_staggered(vectorized): ...@@ -189,16 +194,16 @@ def test_staggered(vectorized):
pytest.skip("cannot detect CPU instruction set") pytest.skip("cannot detect CPU instruction set")
pytest.importorskip('islpy') pytest.importorskip('islpy')
cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': False, cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': False,
'instruction_set': instruction_sets[0]} 'instruction_set': instruction_sets[-1]}
dh.fill(j.name, 867) dh.fill(j.name, 867)
dh.run_kernel(kernel, seed=5, time_step=309) dh.run_kernel(kernel, seed=5, time_step=309)
ref_data = dh.gather_array(j.name) ref_data = dh.gather_array(j.name)
kernel2 = ps.create_staggered_kernel(a, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile() kernel2 = ps.create_staggered_kernel(a, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile()
dh.fill(j.name, 867) dh.fill(j.name, 867)
dh.run_kernel(kernel2, seed=5, time_step=309) dh.run_kernel(kernel2, seed=5, time_step=309)
data = dh.gather_array(j.name) data = dh.gather_array(j.name)
assert np.allclose(ref_data, data) assert np.allclose(ref_data, data)
from pystencils.cache import sharedmethodcache
class Fib:
def __init__(self):
self.fib_rec_called = 0
self.fib_iter_called = 0
@sharedmethodcache("fib_cache")
def fib_rec(self, n):
self.fib_rec_called += 1
return 1 if n <= 1 else self.fib_rec(n-1) + self.fib_rec(n-2)
@sharedmethodcache("fib_cache")
def fib_iter(self, n):
self.fib_iter_called += 1
f1, f2 = 0, 1
for i in range(n):
f2 = f1 + f2
f1 = f2 - f1
return f2
def test_fib_memoization_1():
fib = Fib()
assert "fib_cache" not in fib.__dict__
f13 = fib.fib_rec(13)
assert fib.fib_rec_called == 14
assert "fib_cache" in fib.__dict__
assert fib.fib_cache[(13,)] == f13
for k in range(14):
# fib_iter should use cached results from fib_rec
fib.fib_iter(k)
assert fib.fib_iter_called == 0
def test_fib_memoization_2():
fib = Fib()
f11 = fib.fib_iter(11)
f12 = fib.fib_iter(12)
assert fib.fib_iter_called == 2
f13 = fib.fib_rec(13)
# recursive calls should be cached
assert fib.fib_rec_called == 1
class Triad:
def __init__(self):
self.triad_called = 0
@sharedmethodcache("triad_cache")
def triad(self, a, b, c=0):
"""Computes the triad a*b+c."""
self.triad_called += 1
return a * b + c
def test_triad_memoization():
triad = Triad()
assert triad.triad.__doc__ == "Computes the triad a*b+c."
t = triad.triad(12, 4, 15)
assert triad.triad_called == 1
assert triad.triad_cache[(12, 4, 15)] == t
t = triad.triad(12, 4, c=15)
assert triad.triad_called == 2
assert triad.triad_cache[(12, 4, 'c', 15)] == t
t = triad.triad(12, 4, 15)
assert triad.triad_called == 2
t = triad.triad(12, 4, c=15)
assert triad.triad_called == 2
...@@ -71,9 +71,10 @@ def test_split_inner_loop(): ...@@ -71,9 +71,10 @@ def test_split_inner_loop():
ast = ps.create_kernel(ac) ast = ps.create_kernel(ac)
code = ps.get_code_str(ast) code = ps.get_code_str(ast)
ps.show_code(ast)
# we have four inner loops as indicated in split groups (4 elements) plus one outer loop # we have four inner loops as indicated in split groups (4 elements) plus one outer loop
assert code.count('for') == 5 assert code.count('for') == 5
ast = ps.create_kernel(ac, target='gpu') ast = ps.create_kernel(ac, target=ps.Target.GPU)
code = ps.get_code_str(ast) code = ps.get_code_str(ast)
# on GPUs is wouldn't be good to use loop splitting # on GPUs is wouldn't be good to use loop splitting
......
from sys import version_info as vs
import pytest import pytest
import pystencils.config
import sympy as sp import sympy as sp
import pystencils as ps
from pystencils import Assignment, AssignmentCollection, fields
from pystencils.simp import subexpression_substitution_in_main_assignments from pystencils.simp import subexpression_substitution_in_main_assignments
from pystencils.simp import add_subexpressions_for_divisions from pystencils.simp import add_subexpressions_for_divisions
from pystencils.simp import add_subexpressions_for_sums from pystencils.simp import add_subexpressions_for_sums
from pystencils.simp import add_subexpressions_for_field_reads from pystencils.simp import add_subexpressions_for_field_reads
from pystencils.simp.simplifications import add_subexpressions_for_constants from pystencils.simp.simplifications import add_subexpressions_for_constants
from pystencils import Assignment, AssignmentCollection, fields from pystencils.typing import BasicType, TypedSymbol
a, b, c, d, x, y, z = sp.symbols("a b c d x y z") a, b, c, d, x, y, z = sp.symbols("a b c d x y z")
s0, s1, s2, s3 = sp.symbols("s_:4") s0, s1, s2, s3 = sp.symbols("s_:4")
...@@ -128,11 +133,68 @@ def test_add_subexpressions_for_sums(): ...@@ -128,11 +133,68 @@ def test_add_subexpressions_for_sums():
def test_add_subexpressions_for_field_reads(): def test_add_subexpressions_for_field_reads():
s, v = fields("s(5), v(5): double[2D]") s, v = fields("s(5), v(5): double[2D]")
subexpressions = [] subexpressions = []
main = [
Assignment(s[0, 0](0), 3 * v[0, 0](0)), main = [Assignment(s[0, 0](0), 3 * v[0, 0](0)),
Assignment(s[0, 0](1), 10 * v[0, 0](1)) Assignment(s[0, 0](1), 10 * v[0, 0](1))]
]
ac = AssignmentCollection(main, subexpressions) ac = AssignmentCollection(main, subexpressions)
assert len(ac.subexpressions) == 0 assert len(ac.subexpressions) == 0
ac = add_subexpressions_for_field_reads(ac) ac2 = add_subexpressions_for_field_reads(ac)
assert len(ac.subexpressions) == 2 assert len(ac2.subexpressions) == 2
ac3 = add_subexpressions_for_field_reads(ac, data_type="float32")
assert len(ac3.subexpressions) == 2
assert isinstance(ac3.subexpressions[0].lhs, TypedSymbol)
assert ac3.subexpressions[0].lhs.dtype == BasicType("float32")
# added check for early out of add_subexpressions_for_field_reads is no fields appear on the rhs (See #92)
main = [Assignment(s[0, 0](0), 3.0),
Assignment(s[0, 0](1), 4.0)]
ac4 = AssignmentCollection(main, subexpressions)
assert len(ac4.subexpressions) == 0
ac5 = add_subexpressions_for_field_reads(ac4)
assert ac5 is not None
assert ac4 is ac5
@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU))
@pytest.mark.parametrize('dtype', ('float32', 'float64'))
@pytest.mark.skipif((vs.major, vs.minor, vs.micro) == (3, 8, 2), reason="does not work on python 3.8.2 for some reason")
def test_sympy_optimizations(target, dtype):
if target == ps.Target.GPU:
pytest.importorskip("cupy")
src, dst = ps.fields(f'src, dst: {dtype}[2d]')
assignments = ps.AssignmentCollection({
src[0, 0]: 1.0 * (sp.exp(dst[0, 0]) - 1)
})
config = pystencils.config.CreateKernelConfig(target=target, default_number_float=dtype)
ast = ps.create_kernel(assignments, config=config)
ps.show_code(ast)
code = ps.get_code_str(ast)
if dtype == 'float32':
assert 'expf(' in code
elif dtype == 'float64':
assert 'exp(' in code
@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU))
@pytest.mark.parametrize('simplification', (True, False))
@pytest.mark.skipif((vs.major, vs.minor, vs.micro) == (3, 8, 2), reason="does not work on python 3.8.2 for some reason")
def test_evaluate_constant_terms(target, simplification):
if target == ps.Target.GPU:
pytest.importorskip("cupy")
src, dst = ps.fields('src, dst: float32[2d]')
# cos of a number will always be simplified
assignments = ps.AssignmentCollection({
src[0, 0]: -sp.cos(1) + dst[0, 0]
})
config = pystencils.config.CreateKernelConfig(target=target, default_assignment_simplifications=simplification)
ast = ps.create_kernel(assignments, config=config)
code = ps.get_code_str(ast)
assert 'cos(' not in code
import numpy as np import numpy as np
import pytest import pytest
import pystencils
import sympy as sp import sympy as sp
from pystencils import Assignment, Field, create_kernel, fields from pystencils import Assignment, Field, create_kernel, fields
...@@ -104,13 +106,20 @@ def test_loop_independence_checks(): ...@@ -104,13 +106,20 @@ def test_loop_independence_checks():
Assignment(g[0, 0], f[1, 0])]) Assignment(g[0, 0], f[1, 0])])
assert 'Field g is written at two different locations' in str(e.value) assert 'Field g is written at two different locations' in str(e.value)
# This is allowed - because only one element of g is accessed # This is not allowed - because this is not SSA (it can be overwritten with allow_double_writes)
with pytest.raises(ValueError) as e:
create_kernel([Assignment(g[0, 2], f[0, 1]),
Assignment(g[0, 2], 2 * g[0, 2])])
# This is allowed - because allow_double_writes is True now
create_kernel([Assignment(g[0, 2], f[0, 1]), create_kernel([Assignment(g[0, 2], f[0, 1]),
Assignment(g[0, 2], 2 * g[0, 2])]) Assignment(g[0, 2], 2 * g[0, 2])],
config=pystencils.CreateKernelConfig(allow_double_writes=True))
create_kernel([Assignment(v[0, 2](1), f[0, 1]), with pytest.raises(ValueError) as e:
Assignment(v[0, 1](0), 4), create_kernel([Assignment(v[0, 2](1), f[0, 1]),
Assignment(v[0, 2](1), 2 * v[0, 2](1))]) Assignment(v[0, 1](0), 4),
Assignment(v[0, 2](1), 2 * v[0, 2](1))])
with pytest.raises(ValueError) as e: with pytest.raises(ValueError) as e:
create_kernel([Assignment(g[0, 1], 3), create_kernel([Assignment(g[0, 1], 3),
......
import numpy as np
import sympy as sp
import pytest
from pystencils import (
Assignment,
Field,
TypedSymbol,
create_kernel,
make_slice,
Target,
create_data_handling,
)
from pystencils.simp import sympy_cse_on_assignment_list
@pytest.mark.parametrize("target", [Target.CPU, Target.GPU])
def test_sliced_iteration(target):
if target == Target.GPU:
pytest.importorskip("cupy")
size = (4, 4)
dh = create_data_handling(size, default_target=target, default_ghost_layers=0)
src_field = dh.add_array("src", 1)
dst_field = dh.add_array("dst", 1)
dh.fill(src_field.name, 1.0, ghost_layers=True)
dh.fill(dst_field.name, 0.0, ghost_layers=True)
a, b = sp.symbols("a b")
update_rule = Assignment(
dst_field[0, 0],
(
a * src_field[0, 1]
+ a * src_field[0, -1]
+ b * src_field[1, 0]
+ b * src_field[-1, 0]
)
/ 4,
)
s = make_slice[1:3, 1]
kernel = create_kernel(
sympy_cse_on_assignment_list([update_rule]), iteration_slice=s, target=target
).compile()
if target == Target.GPU:
dh.all_to_gpu()
dh.run_kernel(kernel, a=1.0, b=1.0)
if target == Target.GPU:
dh.all_to_cpu()
expected_result = np.zeros(size)
expected_result[1:3, 1] = 1
np.testing.assert_almost_equal(dh.gather_array(dst_field.name), expected_result)
@pytest.mark.parametrize("target", [Target.CPU, Target.GPU])
def test_symbols_in_slice(target):
if target == Target.GPU:
pytest.xfail("Iteration slices including arbitrary symbols are currently broken on GPU")
size = (4, 4)
dh = create_data_handling(size, default_target=target, default_ghost_layers=0)
src_field = dh.add_array("src", 1)
dst_field = dh.add_array("dst", 1)
dh.fill(src_field.name, 1.0, ghost_layers=True)
dh.fill(dst_field.name, 0.0, ghost_layers=True)
a, b = sp.symbols("a b")
update_rule = Assignment(
dst_field[0, 0],
(
a * src_field[0, 1]
+ a * src_field[0, -1]
+ b * src_field[1, 0]
+ b * src_field[-1, 0]
)
/ 4,
)
x_end = TypedSymbol("x_end", "int")
s = make_slice[1:x_end, 1]
x_end_value = size[1] - 1
kernel = create_kernel(
sympy_cse_on_assignment_list([update_rule]), iteration_slice=s, target=target
).compile()
if target == Target.GPU:
dh.all_to_gpu()
dh.run_kernel(kernel, a=1.0, b=1.0, x_end=x_end_value)
if target == Target.GPU:
dh.all_to_cpu()
expected_result = np.zeros(size)
expected_result[1:x_end_value, 1] = 1
np.testing.assert_almost_equal(dh.gather_array(dst_field.name), expected_result)
%% Cell type:code id: tags:
``` python
import pytest
pytest.importorskip('waLBerla')
```
%% Output
<module 'waLBerla' from '/Users/holzer/walberla/python/waLBerla/__init__.py'>
%% Cell type:code id: tags:
``` python
from pystencils.session import *
from time import perf_counter
from statistics import median
from functools import partial
```
%% Cell type:markdown id: tags:
## Benchmark for Python call overhead
%% Cell type:code id: tags:
``` python
inner_repeats = 100
outer_repeats = 5
sizes = [2**i for i in range(1, 8)]
sizes
```
%% Output
$\displaystyle \left[ 2, \ 4, \ 8, \ 16, \ 32, \ 64, \ 128\right]$
[2, 4, 8, 16, 32, 64, 128]
%% Cell type:code id: tags:
``` python
def benchmark_pure(domain_size, extract_first=False):
src = np.zeros(domain_size)
dst = np.zeros_like(src)
f_src, f_dst = ps.fields("src, dst", src=src, dst=dst)
kernel = ps.create_kernel(ps.Assignment(f_dst.center, f_src.center)).compile()
if extract_first:
kernel = kernel.kernel
start = perf_counter()
for i in range(inner_repeats):
kernel(src=src, dst=dst)
src, dst = dst, src
end = perf_counter()
else:
start = perf_counter()
for i in range(inner_repeats):
kernel(src=src, dst=dst)
src, dst = dst, src
end = perf_counter()
return (end - start) / inner_repeats
def benchmark_datahandling(domain_size, parallel=False):
dh = ps.create_data_handling(domain_size, parallel=parallel)
f_src = dh.add_array('src')
f_dst = dh.add_array('dst')
kernel = ps.create_kernel(ps.Assignment(f_dst.center, f_src.center)).compile()
start = perf_counter()
for i in range(inner_repeats):
dh.run_kernel(kernel)
dh.swap('src', 'dst')
end = perf_counter()
return (end - start) / inner_repeats
name_to_func = {
'pure_extract': partial(benchmark_pure, extract_first=True),
'pure_no_extract': partial(benchmark_pure, extract_first=False),
'dh_serial': partial(benchmark_datahandling, parallel=False),
'dh_parallel': partial(benchmark_datahandling, parallel=True),
}
```
%% Cell type:code id: tags:
``` python
result = {'block_size': [],
'name': [],
'time': []}
for bs in sizes:
print("Computing size ", bs)
for name, func in name_to_func.items():
for i in range(outer_repeats):
time = func((bs, bs))
result['block_size'].append(bs)
result['name'].append(name)
result['time'].append(time)
```
%% Output
Computing size 2
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/var/folders/07/0d7kq8fd0sx24cs53zz90_qc0000gp/T/ipykernel_12649/2009975470.py in <module>
7 for name, func in name_to_func.items():
8 for i in range(outer_repeats):
----> 9 time = func((bs, bs))
10 result['block_size'].append(bs)
11 result['name'].append(name)
/var/folders/07/0d7kq8fd0sx24cs53zz90_qc0000gp/T/ipykernel_12649/3509370390.py in benchmark_datahandling(domain_size, parallel)
20
21 def benchmark_datahandling(domain_size, parallel=False):
---> 22 dh = ps.create_data_handling(domain_size, parallel=parallel)
23 f_src = dh.add_array('src')
24 f_dst = dh.add_array('dst')
~/pystencils/pystencils/pystencils/datahandling/__init__.py in create_data_handling(domain_size, periodicity, default_layout, default_target, parallel, default_ghost_layers)
44 if parallel:
45 if wlb is None:
---> 46 raise ValueError("Cannot create parallel data handling because walberla module is not available")
47
48 if periodicity is False or periodicity is None:
ValueError: Cannot create parallel data handling because walberla module is not available
%% Cell type:code id: tags:
``` python
if 'is_test_run' not in globals():
import pandas as pd
import seaborn as sns
data = pd.DataFrame.from_dict(result)
plt.subplot(1,2,1)
sns.barplot(x='block_size', y='time', hue='name', data=data, alpha=0.6)
plt.yscale('log')
plt.subplot(1,2,2)
data = pd.DataFrame.from_dict(result)
sns.barplot(x='block_size', y='time', hue='name', data=data, alpha=0.6)
```
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
""" """
import pystencils import pystencils
import pystencils.astnodes import pystencils.astnodes
import pystencils.config
def test_source_code_comment(): def test_source_code_comment():
...@@ -19,7 +20,8 @@ def test_source_code_comment(): ...@@ -19,7 +20,8 @@ def test_source_code_comment():
{a.center(): b[0, 2] + b[0, 0]}, {} {a.center(): b[0, 2] + b[0, 0]}, {}
) )
ast = pystencils.create_kernel(assignments, target='cpu') config = pystencils.config.CreateKernelConfig(target=pystencils.Target.CPU)
ast = pystencils.create_kernel(assignments, config=config)
ast.body.append(pystencils.astnodes.SourceCodeComment("Hallo")) ast.body.append(pystencils.astnodes.SourceCodeComment("Hallo"))
ast.body.append(pystencils.astnodes.EmptyLine()) ast.body.append(pystencils.astnodes.EmptyLine())
......
...@@ -5,10 +5,11 @@ import pytest ...@@ -5,10 +5,11 @@ import pytest
import pystencils as ps import pystencils as ps
from pystencils import x_staggered_vector, TypedSymbol from pystencils import x_staggered_vector, TypedSymbol
from pystencils.enums import Target
class TestStaggeredDiffusion: class TestStaggeredDiffusion:
def _run(self, num_neighbors, target='cpu', openmp=False): def _run(self, num_neighbors, target=ps.Target.CPU, openmp=False):
L = (40, 40) L = (40, 40)
D = 0.066 D = 0.066
dt = 1 dt = 1
...@@ -71,18 +72,12 @@ class TestStaggeredDiffusion: ...@@ -71,18 +72,12 @@ class TestStaggeredDiffusion:
def test_diffusion_4(self): def test_diffusion_4(self):
self._run(4) self._run(4)
def test_diffusion_opencl(self):
import pytest
pytest.importorskip('pyopencl')
import pystencils.opencl.autoinit
self._run(4, 'opencl')
def test_diffusion_openmp(self): def test_diffusion_openmp(self):
self._run(4, openmp=True) self._run(4, openmp=True)
def test_staggered_subexpressions(): def test_staggered_subexpressions():
dh = ps.create_data_handling((10, 10), periodicity=True, default_target='cpu') dh = ps.create_data_handling((10, 10), periodicity=True, default_target=Target.CPU)
j = dh.add_array('j', values_per_cell=2, field_type=ps.FieldType.STAGGERED) j = dh.add_array('j', values_per_cell=2, field_type=ps.FieldType.STAGGERED)
c = sp.symbols("c") c = sp.symbols("c")
assignments = [ps.Assignment(j.staggered_access("W"), c), assignments = [ps.Assignment(j.staggered_access("W"), c),
...@@ -92,7 +87,7 @@ def test_staggered_subexpressions(): ...@@ -92,7 +87,7 @@ def test_staggered_subexpressions():
def test_staggered_loop_cutting(): def test_staggered_loop_cutting():
pytest.importorskip('islpy') pytest.importorskip('islpy')
dh = ps.create_data_handling((4, 4), periodicity=True, default_target='cpu') dh = ps.create_data_handling((4, 4), periodicity=True, default_target=Target.CPU)
j = dh.add_array('j', values_per_cell=4, field_type=ps.FieldType.STAGGERED) j = dh.add_array('j', values_per_cell=4, field_type=ps.FieldType.STAGGERED)
assignments = [ps.Assignment(j.staggered_access("SW"), 1)] assignments = [ps.Assignment(j.staggered_access("SW"), 1)]
ast = ps.create_staggered_kernel(assignments, target=dh.default_target) ast = ps.create_staggered_kernel(assignments, target=dh.default_target)
......
from pystencils import fields, Assignment, AssignmentCollection
from pystencils.simp.subexpression_insertion import *
def test_subexpression_insertion():
f, g = fields('f(10), g(10) : [2D]')
xi = sp.symbols('xi_:10')
xi_set = set(xi)
subexpressions = [
Assignment(xi[0], -f(4)),
Assignment(xi[1], -(f(1) * f(2))),
Assignment(xi[2], 2.31 * f(5)),
Assignment(xi[3], 1.8 + f(5) + f(6)),
Assignment(xi[4], 5.7 + f(6)),
Assignment(xi[5], (f(4) + f(5))**2),
Assignment(xi[6], f(3)**2),
Assignment(xi[7], f(4)),
Assignment(xi[8], 13),
Assignment(xi[9], 0),
]
assignments = [Assignment(g(i), x) for i, x in enumerate(xi)]
ac = AssignmentCollection(assignments, subexpressions=subexpressions)
ac_ins = insert_symbol_times_minus_one(ac)
assert (ac_ins.bound_symbols & xi_set) == (xi_set - {xi[0]})
ac_ins = insert_constant_multiples(ac)
assert (ac_ins.bound_symbols & xi_set) == (xi_set - {xi[0], xi[2]})
ac_ins = insert_constant_additions(ac)
assert (ac_ins.bound_symbols & xi_set) == (xi_set - {xi[4]})
ac_ins = insert_squares(ac)
assert (ac_ins.bound_symbols & xi_set) == (xi_set - {xi[6]})
ac_ins = insert_aliases(ac)
assert (ac_ins.bound_symbols & xi_set) == (xi_set - {xi[7]})
ac_ins = insert_zeros(ac)
assert (ac_ins.bound_symbols & xi_set) == (xi_set - {xi[9]})
ac_ins = insert_constants(ac, skip={xi[9]})
assert (ac_ins.bound_symbols & xi_set) == (xi_set - {xi[8]})
# -*- coding: utf-8 -*-
#
# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
#
# Distributed under terms of the GPLv3 license.
"""
"""
import pytest
import numpy as np
import pystencils.config
import sympy as sp
import sympy.abc
import pystencils as ps
from pystencils.typing import create_type
@pytest.mark.parametrize('dtype', ["float64", "float32"])
def test_sum(dtype):
sum = sp.Sum(sp.abc.k, (sp.abc.k, 1, 100))
expanded_sum = sum.doit()
# print(sum)
# print(expanded_sum)
x = ps.fields(f'x: {dtype}[1d]')
assignments = ps.AssignmentCollection({x.center(): sum})
ast = ps.create_kernel(assignments)
code = ps.get_code_str(ast)
kernel = ast.compile()
# ps.show_code(ast)
if dtype == "float32":
assert "5050.0f;" in code
array = np.zeros((10,), dtype=dtype)
kernel(x=array)
assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
@pytest.mark.parametrize('dtype', ["int32", "int64", "float64", "float32"])
def test_product(dtype):
k = ps.TypedSymbol('k', create_type(dtype))
sum = sympy.Product(k, (k, 1, 10))
expanded_sum = sum.doit()
# print(sum)
# print(expanded_sum)
x = ps.fields(f'x: {dtype}[1d]')
assignments = ps.AssignmentCollection({x.center(): sum})
config = pystencils.config.CreateKernelConfig()
ast = ps.create_kernel(assignments, config=config)
code = ps.get_code_str(ast)
kernel = ast.compile()
# print(code)
if dtype == "int64" or dtype == "int32":
assert '3628800;' in code
elif dtype == "float32":
assert '3628800.0f;' in code
else:
assert '3628800.0;' in code
array = np.zeros((10,), dtype=dtype)
kernel(x=array)
assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
# TODO: See Issue !55
# def test_prod_var_limit():
#
# k = ps.TypedSymbol('k', create_type('int64'))
# limit = ps.TypedSymbol('limit', create_type('int64'))
#
# sum = sympy.Sum(k, (k, 1, limit))
# expanded_sum = sum.replace(limit, 100).doit()
#
# print(sum)
# print(expanded_sum)
#
# x = ps.fields('x: int64[1d]')
#
# assignments = ps.AssignmentCollection({x.center(): sum})
#
# ast = ps.create_kernel(assignments)
# ps.show_code(ast)
# kernel = ast.compile()
#
# array = np.zeros((10,), np.int64)
#
# kernel(x=array, limit=100)
#
# assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
import sympy import sympy
import numpy as np import numpy as np
import sympy as sp
import pystencils import pystencils
from pystencils.sympyextensions import replace_second_order_products from pystencils.sympyextensions import replace_second_order_products
from pystencils.sympyextensions import remove_higher_order_terms from pystencils.sympyextensions import remove_higher_order_terms
from pystencils.sympyextensions import complete_the_squares_in_exp from pystencils.sympyextensions import complete_the_squares_in_exp
from pystencils.sympyextensions import extract_most_common_factor from pystencils.sympyextensions import extract_most_common_factor
from pystencils.sympyextensions import simplify_by_equality
from pystencils.sympyextensions import count_operations from pystencils.sympyextensions import count_operations
from pystencils.sympyextensions import common_denominator from pystencils.sympyextensions import common_denominator
from pystencils.sympyextensions import get_symmetric_part from pystencils.sympyextensions import get_symmetric_part
...@@ -13,6 +15,7 @@ from pystencils.sympyextensions import scalar_product ...@@ -13,6 +15,7 @@ from pystencils.sympyextensions import scalar_product
from pystencils.sympyextensions import kronecker_delta from pystencils.sympyextensions import kronecker_delta
from pystencils import Assignment from pystencils import Assignment
from pystencils.functions import DivFunc
from pystencils.fast_approximation import (fast_division, fast_inv_sqrt, fast_sqrt, from pystencils.fast_approximation import (fast_division, fast_inv_sqrt, fast_sqrt,
insert_fast_divisions, insert_fast_sqrts) insert_fast_divisions, insert_fast_sqrts)
...@@ -161,6 +164,30 @@ def test_count_operations(): ...@@ -161,6 +164,30 @@ def test_count_operations():
assert ops['divs'] == 1 assert ops['divs'] == 1
assert ops['sqrts'] == 1 assert ops['sqrts'] == 1
expr = DivFunc(x, y)
ops = count_operations(expr, only_type=None)
assert ops['divs'] == 1
expr = DivFunc(x + z, y + z)
ops = count_operations(expr, only_type=None)
assert ops['adds'] == 2
assert ops['divs'] == 1
expr = sp.UnevaluatedExpr(sp.Mul(*[x]*100, evaluate=False))
ops = count_operations(expr, only_type=None)
assert ops['muls'] == 99
expr = DivFunc(1, sp.UnevaluatedExpr(sp.Mul(*[x]*100, evaluate=False)))
ops = count_operations(expr, only_type=None)
assert ops['divs'] == 1
assert ops['muls'] == 99
expr = DivFunc(y + z, sp.UnevaluatedExpr(sp.Mul(*[x]*100, evaluate=False)))
ops = count_operations(expr, only_type=None)
assert ops['adds'] == 1
assert ops['divs'] == 1
assert ops['muls'] == 99
def test_common_denominator(): def test_common_denominator():
x = sympy.symbols('x') x = sympy.symbols('x')
...@@ -176,3 +203,26 @@ def test_get_symmetric_part(): ...@@ -176,3 +203,26 @@ def test_get_symmetric_part():
sym_part = get_symmetric_part(expr, sympy.symbols(f'y z')) sym_part = get_symmetric_part(expr, sympy.symbols(f'y z'))
assert sym_part == expected_result assert sym_part == expected_result
def test_simplify_by_equality():
x, y, z = sp.symbols('x, y, z')
p, q = sp.symbols('p, q')
# Let x = y + z
expr = x * p - y * p + z * q
expr = simplify_by_equality(expr, x, y, z)
assert expr == z * p + z * q
expr = x * (p - 2 * q) + 2 * q * z
expr = simplify_by_equality(expr, x, y, z)
assert expr == x * p - 2 * q * y
expr = x * (y + z) - y * z
expr = simplify_by_equality(expr, x, y, z)
assert expr == x*y + z**2
# Let x = y + 2
expr = x * p - 2 * p
expr = simplify_by_equality(expr, x, y, 2)
assert expr == y * p
...@@ -59,4 +59,6 @@ def test_timeloop(): ...@@ -59,4 +59,6 @@ def test_timeloop():
timeloop.run_time_span(seconds=seconds) timeloop.run_time_span(seconds=seconds)
end = time.perf_counter() end = time.perf_counter()
np.testing.assert_almost_equal(seconds, end - start, decimal=2) # This test case fails often due to time measurements. It is not a good idea to assert here
# np.testing.assert_almost_equal(seconds, end - start, decimal=2)
print("timeloop: ", seconds, " own meassurement: ", end - start)