Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 80 additions and 2040 deletions
# -*- coding: utf-8 -*-
#
# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
#
# Distributed under terms of the GPLv3 license.
"""
"""
import itertools
from os.path import dirname, join
import numpy as np
import pytest
import sympy
import pystencils
from pystencils.interpolation_astnodes import LinearInterpolator
from pystencils.spatial_coordinates import x_, y_
type_map = {
np.float32: 'float32',
np.float64: 'float64',
np.int32: 'int32',
}
try:
import pyconrad.autoinit
except Exception:
import unittest.mock
pyconrad = unittest.mock.MagicMock()
LENNA_FILE = join(dirname(__file__), 'test_data', 'lenna.png')
try:
import skimage.io
lenna = skimage.io.imread(LENNA_FILE, as_gray=True).astype(np.float64)
pyconrad.imshow(lenna)
except Exception:
lenna = np.random.rand(20, 30)
def test_interpolation():
x_f, y_f = pystencils.fields('x,y: float64 [2d]')
assignments = pystencils.AssignmentCollection({
y_f.center(): LinearInterpolator(x_f).at([x_ + 2.7, y_ + 7.2])
})
print(assignments)
ast = pystencils.create_kernel(assignments)
print(ast)
print(pystencils.show_code(ast))
kernel = ast.compile()
pyconrad.imshow(lenna)
out = np.zeros_like(lenna)
kernel(x=lenna, y=out)
pyconrad.imshow(out, "out")
def test_scale_interpolation():
x_f, y_f = pystencils.fields('x,y: float64 [2d]')
for address_mode in ['border', 'wrap', 'clamp', 'mirror']:
assignments = pystencils.AssignmentCollection({
y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at([0.5 * x_ + 2.7, 0.25 * y_ + 7.2])
})
print(assignments)
ast = pystencils.create_kernel(assignments)
print(ast)
print(pystencils.show_code(ast))
kernel = ast.compile()
out = np.zeros_like(lenna)
kernel(x=lenna, y=out)
pyconrad.imshow(out, "out " + address_mode)
@pytest.mark.parametrize('address_mode',
['border',
'clamp',
pytest.param('warp', marks=pytest.mark.xfail(
reason="requires interpolation-refactoring branch")),
pytest.param('mirror', marks=pytest.mark.xfail(
reason="requires interpolation-refactoring branch")),
])
def test_rotate_interpolation(address_mode):
"""
'wrap', 'mirror' currently fails on new sympy due to conjugate()
"""
x_f, y_f = pystencils.fields('x,y: float64 [2d]')
rotation_angle = sympy.pi / 5
transformed = sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_))
assignments = pystencils.AssignmentCollection({
y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed)
})
print(assignments)
ast = pystencils.create_kernel(assignments)
print(ast)
print(pystencils.show_code(ast))
kernel = ast.compile()
out = np.zeros_like(lenna)
kernel(x=lenna, y=out)
pyconrad.imshow(out, "out " + address_mode)
@pytest.mark.parametrize('dtype', (np.int32, np.float32, np.float64))
@pytest.mark.parametrize('address_mode', ('border', 'wrap', 'clamp', 'mirror'))
@pytest.mark.parametrize('use_textures', ('use_textures', False))
def test_rotate_interpolation_gpu(dtype, address_mode, use_textures):
pytest.importorskip('pycuda')
import pycuda.gpuarray as gpuarray
import pycuda.autoinit # noqa
rotation_angle = sympy.pi / 5
scale = 1
if dtype == np.int32:
lenna_gpu = gpuarray.to_gpu(
np.ascontiguousarray(lenna * 255, dtype))
else:
lenna_gpu = gpuarray.to_gpu(
np.ascontiguousarray(lenna, dtype))
x_f, y_f = pystencils.fields(f'x,y: {type_map[dtype]} [2d]', ghost_layers=0)
transformed = scale * \
sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) - sympy.Matrix([2, 2])
assignments = pystencils.AssignmentCollection({
y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed)
})
print(assignments)
ast = pystencils.create_kernel(assignments, target='gpu', use_textures_for_interpolation=use_textures)
print(ast)
print(pystencils.show_code(ast))
kernel = ast.compile()
out = gpuarray.zeros_like(lenna_gpu)
kernel(x=lenna_gpu, y=out)
pyconrad.imshow(out,
f"out {address_mode} texture:{use_textures} {type_map[dtype]}")
@pytest.mark.parametrize('address_mode', ['border', 'wrap',
pytest.param('warp', marks=pytest.mark.xfail(
reason="% printed as fmod on old sympy")),
pytest.param('mirror', marks=pytest.mark.xfail(
reason="% printed as fmod on old sympy")),
])
@pytest.mark.parametrize('dtype', [np.float64, np.float32, np.int32])
@pytest.mark.parametrize('use_textures', ('use_textures', False,))
def test_shift_interpolation_gpu(address_mode, dtype, use_textures):
sver = sympy.__version__.split(".")
if (int(sver[0]) == 1 and int(sver[1]) < 2) and address_mode in ['mirror', 'warp']:
pytest.skip()
pytest.importorskip('pycuda')
import pycuda.gpuarray as gpuarray
import pycuda.autoinit # noqa
rotation_angle = 0 # sympy.pi / 5
scale = 1
# shift = - sympy.Matrix([1.5, 1.5])
shift = sympy.Matrix((0.0, 0.0))
if dtype == np.int32:
lenna_gpu = gpuarray.to_gpu(
np.ascontiguousarray(lenna * 255, dtype))
else:
lenna_gpu = gpuarray.to_gpu(
np.ascontiguousarray(lenna, dtype))
x_f, y_f = pystencils.fields(f'x,y: {type_map[dtype]} [2d]', ghost_layers=0)
if use_textures:
transformed = scale * sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) + shift
else:
transformed = scale * sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) + shift
assignments = pystencils.AssignmentCollection({
y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed)
})
# print(assignments)
ast = pystencils.create_kernel(assignments, target='gpu', use_textures_for_interpolation=use_textures)
# print(ast)
print(pystencils.show_code(ast))
kernel = ast.compile()
out = gpuarray.zeros_like(lenna_gpu)
kernel(x=lenna_gpu, y=out)
pyconrad.imshow(out,
f"out {address_mode} texture:{use_textures} {type_map[dtype]}")
@pytest.mark.parametrize('address_mode', ['border', 'clamp'])
def test_rotate_interpolation_size_change(address_mode):
"""
'wrap', 'mirror' currently fails on new sympy due to conjugate()
"""
x_f, y_f = pystencils.fields('x,y: float64 [2d]')
rotation_angle = sympy.pi / 5
transformed = sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_))
assignments = pystencils.AssignmentCollection({
y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed)
})
print(assignments)
ast = pystencils.create_kernel(assignments)
print(ast)
print(pystencils.show_code(ast))
kernel = ast.compile()
out = np.zeros((100, 150), np.float64)
kernel(x=lenna, y=out)
pyconrad.imshow(out, "small out " + address_mode)
@pytest.mark.parametrize('address_mode, target',
itertools.product(['border', 'wrap', 'clamp', 'mirror'], ['cpu']))
def test_field_interpolated(address_mode, target):
x_f, y_f = pystencils.fields('x,y: float64 [2d]')
assignments = pystencils.AssignmentCollection({
y_f.center(): x_f.interpolated_access([0.5 * x_ + 2.7, 0.25 * y_ + 7.2], address_mode=address_mode)
})
print(assignments)
ast = pystencils.create_kernel(assignments, target=target)
print(ast)
print(pystencils.show_code(ast))
kernel = ast.compile()
out = np.zeros_like(lenna)
kernel(x=lenna, y=out)
pyconrad.imshow(out, "out " + address_mode)
def test_spatial_derivative():
x, y = pystencils.fields('x, y: float32[2d]')
tx, ty = pystencils.fields('t_x, t_y: float32[2d]')
diff = sympy.diff(x.interpolated_access((tx.center, ty.center)), tx.center)
print("diff: " + str(diff))
%% Cell type:code id: tags:
``` python
from pystencils.session import *
```
%% Cell type:code id: tags:
``` python
dh = ps.create_data_handling(domain_size=(256, 256), periodicity=True)
c_field = dh.add_array('c')
dh.fill("c", 0.0, ghost_layers=True)
```
%% Cell type:code id: tags:
``` python
for x in range(129):
for y in range(258):
dh.cpu_arrays['c'][x, y] = 1.0
```
%% Cell type:code id: tags:
``` python
plt.scalar_field(dh.cpu_arrays["c"])
```
%% Output
<matplotlib.image.AxesImage at 0x7ff5b00707c0>
%% Cell type:code id: tags:
``` python
ur = ps.Assignment(c_field[0, 0], c_field[1, 0])
ast = ps.create_kernel(ur, target=dh.default_target, cpu_openmp=True)
kernel = ast.compile()
```
%% Cell type:code id: tags:
``` python
c_sync = dh.synchronization_function(['c'])
```
%% Cell type:code id: tags:
``` python
def timeloop(steps=10):
for i in range(steps):
c_sync()
dh.run_kernel(kernel)
return dh.gather_array('c')
```
%% Cell type:code id: tags:
``` python
ps.jupyter.set_display_mode('video')
```
%% Cell type:code id: tags:
``` python
ani = ps.plot.scalar_field_animation(timeloop, rescale=True, frames=12)
ps.jupyter.display_animation(ani)
```
%% Output
<IPython.core.display.HTML object>
%% Cell type:code id: tags:
``` python
ps.jupyter.set_display_mode('image_update')
```
%% Cell type:code id: tags:
``` python
ani = ps.plot.scalar_field_animation(timeloop, rescale=True, frames=12)
ps.jupyter.display_animation(ani)
```
%% Output
%% Cell type:code id: tags:
``` python
def grid_update_function(image):
for i in range(40):
c_sync()
dh.run_kernel(kernel)
return dh.gather_array('c')
```
%% Cell type:code id: tags:
``` python
animation = ps.jupyter.make_imshow_animation(dh.cpu_arrays["c"], grid_update_function, frames=300)
```
%% Output
import numpy as np
import pytest
import sympy as sp
from pathlib import Path
from kerncraft.kernel import KernelCode
from kerncraft.machinemodel import MachineModel
from kerncraft.models import ECM, ECMData, Benchmark
from pystencils import Assignment, Field, fields
from pystencils.cpu import create_kernel
from pystencils.kerncraft_coupling import KerncraftParameters, PyStencilsKerncraftKernel
from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark, run_c_benchmark
from pystencils.timeloop import TimeLoop
SCRIPT_FOLDER = Path(__file__).parent
INPUT_FOLDER = SCRIPT_FOLDER / "kerncraft_inputs"
@pytest.mark.kerncraft
def test_compilation():
machine_file_path = INPUT_FOLDER / "Example_SandyBridgeEP_E5-2680.yml"
machine = MachineModel(path_to_yaml=machine_file_path)
kernel_file_path = INPUT_FOLDER / "2d-5pt.c"
with open(kernel_file_path) as kernel_file:
reference_kernel = KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path)
reference_kernel.get_kernel_header(name='test_kernel')
reference_kernel.get_kernel_code(name='test_kernel')
reference_kernel.get_main_code(kernel_function_name='test_kernel')
size = [30, 50, 3]
arr = np.zeros(size)
a = Field.create_from_numpy_array('a', arr, index_dimensions=1)
b = Field.create_from_numpy_array('b', arr, index_dimensions=1)
s = sp.Symbol("s")
rhs = a[0, -1](0) + a[0, 1] + a[-1, 0] + a[1, 0]
update_rule = Assignment(b[0, 0], s * rhs)
ast = create_kernel([update_rule])
mine = generate_benchmark(ast, likwid=False)
print(mine)
@pytest.mark.kerncraft
def analysis(kernel, machine, model='ecmdata'):
if model == 'ecmdata':
model = ECMData(kernel, machine, KerncraftParameters())
elif model == 'ecm':
model = ECM(kernel, machine, KerncraftParameters())
# model.analyze()
# model.plot()
elif model == 'benchmark':
model = Benchmark(kernel, machine, KerncraftParameters())
else:
model = ECM(kernel, machine, KerncraftParameters())
model.analyze()
return model
@pytest.mark.kerncraft
def test_3d_7pt_osaca():
size = [20, 200, 200]
kernel_file_path = INPUT_FOLDER / "3d-7pt.c"
machine_file_path = INPUT_FOLDER / "Example_SandyBridgeEP_E5-2680.yml"
machine_model = MachineModel(path_to_yaml=machine_file_path)
with open(kernel_file_path) as kernel_file:
reference_kernel = KernelCode(kernel_file.read(), machine=machine_model, filename=kernel_file_path)
reference_kernel.set_constant('M', size[0])
reference_kernel.set_constant('N', size[1])
assert size[1] == size[2]
analysis(reference_kernel, machine_model, model='ecm')
arr = np.zeros(size)
a = Field.create_from_numpy_array('a', arr, index_dimensions=0)
b = Field.create_from_numpy_array('b', arr, index_dimensions=0)
s = sp.Symbol("s")
rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1]
update_rule = Assignment(b[0, 0, 0], s * rhs)
ast = create_kernel([update_rule])
k = PyStencilsKerncraftKernel(ast, machine=machine_model)
analysis(k, machine_model, model='ecm')
assert reference_kernel._flops == k._flops
# assert reference.results['cl throughput'] == analysis.results['cl throughput']
@pytest.mark.kerncraft
def test_2d_5pt():
machine_file_path = INPUT_FOLDER / "Example_SandyBridgeEP_E5-2680.yml"
machine = MachineModel(path_to_yaml=machine_file_path)
size = [30, 50, 3]
kernel_file_path = INPUT_FOLDER / "2d-5pt.c"
with open(kernel_file_path) as kernel_file:
reference_kernel = KernelCode(kernel_file.read(), machine=machine,
filename=kernel_file_path)
reference = analysis(reference_kernel, machine)
arr = np.zeros(size)
a = Field.create_from_numpy_array('a', arr, index_dimensions=1)
b = Field.create_from_numpy_array('b', arr, index_dimensions=1)
s = sp.Symbol("s")
rhs = a[0, -1](0) + a[0, 1] + a[-1, 0] + a[1, 0]
update_rule = Assignment(b[0, 0], s * rhs)
ast = create_kernel([update_rule])
k = PyStencilsKerncraftKernel(ast, machine)
result = analysis(k, machine)
for e1, e2 in zip(reference.results['cycles'], result.results['cycles']):
assert e1 == e2
@pytest.mark.kerncraft
def test_3d_7pt():
machine_file_path = INPUT_FOLDER / "Example_SandyBridgeEP_E5-2680.yml"
machine = MachineModel(path_to_yaml=machine_file_path)
size = [30, 50, 50]
kernel_file_path = INPUT_FOLDER / "3d-7pt.c"
with open(kernel_file_path) as kernel_file:
reference_kernel = KernelCode(kernel_file.read(), machine=machine,
filename=kernel_file_path)
reference_kernel.set_constant('M', size[0])
reference_kernel.set_constant('N', size[1])
assert size[1] == size[2]
reference = analysis(reference_kernel, machine)
arr = np.zeros(size)
a = Field.create_from_numpy_array('a', arr, index_dimensions=0)
b = Field.create_from_numpy_array('b', arr, index_dimensions=0)
s = sp.Symbol("s")
rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1]
update_rule = Assignment(b[0, 0, 0], s * rhs)
ast = create_kernel([update_rule])
k = PyStencilsKerncraftKernel(ast, machine)
result = analysis(k, machine)
for e1, e2 in zip(reference.results['cycles'], result.results['cycles']):
assert e1 == e2
@pytest.mark.kerncraft
def test_benchmark():
size = [30, 50, 50]
arr = np.zeros(size)
a = Field.create_from_numpy_array('a', arr, index_dimensions=0)
b = Field.create_from_numpy_array('b', arr, index_dimensions=0)
s = sp.Symbol("s")
rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1]
update_rule = Assignment(b[0, 0, 0], s * rhs)
ast = create_kernel([update_rule])
c_benchmark_run = run_c_benchmark(ast, inner_iterations=1000, outer_iterations=1)
kernel = ast.compile()
a = np.full(size, fill_value=0.23)
b = np.full(size, fill_value=0.23)
timeloop = TimeLoop(steps=1)
timeloop.add_call(kernel, {'a': a, 'b': b, 's': 0.23})
timeloop_time = timeloop.benchmark(number_of_time_steps_for_estimation=1)
np.testing.assert_almost_equal(c_benchmark_run, timeloop_time, decimal=4)
import pytest
try:
from pystencils.llvm.llvmjit import generate_and_jit
from pystencils.llvm import create_kernel, make_python_function
from pystencils.cpu.cpujit import get_llc_command
from pystencils import Assignment, Field, show_code
import numpy as np
import sympy as sp
except ModuleNotFoundError:
pytest.importorskip("llvmlite")
def test_jacobi_fixed_field_size():
size = (30, 20)
src_field_llvm = np.random.rand(*size)
src_field_py = np.copy(src_field_llvm)
dst_field_llvm = np.zeros(size)
dst_field_py = np.zeros(size)
f = Field.create_from_numpy_array("f", src_field_llvm)
d = Field.create_from_numpy_array("d", dst_field_llvm)
jacobi = Assignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4)
ast = create_kernel([jacobi])
for x in range(1, size[0] - 1):
for y in range(1, size[1] - 1):
dst_field_py[x, y] = 0.25 * (src_field_py[x - 1, y] + src_field_py[x + 1, y] +
src_field_py[x, y - 1] + src_field_py[x, y + 1])
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
error = np.sum(np.abs(dst_field_py - dst_field_llvm))
np.testing.assert_almost_equal(error, 0.0)
@pytest.mark.skipif(not get_llc_command(), reason="Tests requires llc in $PATH")
def test_jacobi_fixed_field_size_gpu():
size = (30, 20)
import pycuda.autoinit # noqa
from pycuda.gpuarray import to_gpu
src_field_llvm = np.random.rand(*size)
src_field_py = np.copy(src_field_llvm)
dst_field_llvm = np.zeros(size)
dst_field_py = np.zeros(size)
f = Field.create_from_numpy_array("f", src_field_py)
d = Field.create_from_numpy_array("d", dst_field_py)
src_field_llvm = to_gpu(src_field_llvm)
dst_field_llvm = to_gpu(dst_field_llvm)
jacobi = Assignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4)
ast = create_kernel([jacobi], target='gpu')
show_code(ast)
for x in range(1, size[0] - 1):
for y in range(1, size[1] - 1):
dst_field_py[x, y] = 0.25 * (src_field_py[x - 1, y] + src_field_py[x + 1, y] +
src_field_py[x, y - 1] + src_field_py[x, y + 1])
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
error = np.sum(np.abs(dst_field_py - dst_field_llvm.get()))
np.testing.assert_almost_equal(error, 0.0)
def test_jacobi_variable_field_size():
size = (3, 3, 3)
f = Field.create_generic("f", 3)
d = Field.create_generic("d", 3)
jacobi = Assignment(d[0, 0, 0], (f[1, 0, 0] + f[-1, 0, 0] + f[0, 1, 0] + f[0, -1, 0]) / 4)
ast = create_kernel([jacobi])
src_field_llvm = np.random.rand(*size)
src_field_py = np.copy(src_field_llvm)
dst_field_llvm = np.zeros(size)
dst_field_py = np.zeros(size)
for x in range(1, size[0] - 1):
for y in range(1, size[1] - 1):
for z in range(1, size[2] - 1):
dst_field_py[x, y, z] = 0.25 * (src_field_py[x - 1, y, z] + src_field_py[x + 1, y, z] +
src_field_py[x, y - 1, z] + src_field_py[x, y + 1, z])
kernel = make_python_function(ast, {'f': src_field_llvm, 'd': dst_field_llvm})
kernel()
error = np.sum(np.abs(dst_field_py - dst_field_llvm))
np.testing.assert_almost_equal(error, 0.0)
def test_pow_llvm():
size = (30, 20)
src_field_llvm = 4 * np.ones(size)
dst_field_llvm = np.zeros(size)
f = Field.create_from_numpy_array("f", src_field_llvm)
d = Field.create_from_numpy_array("d", dst_field_llvm)
ur = Assignment(d[0, 0], sp.Pow(f[0, 0], -1.0))
ast = create_kernel([ur])
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
assert np.all(0.25 == dst_field_llvm)
ur = Assignment(d[0, 0], sp.Pow(f[0, 0], 0.5))
ast = create_kernel([ur])
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
assert np.all(2.0 == dst_field_llvm)
ur = Assignment(d[0, 0], sp.Pow(f[0, 0], 2.0))
ast = create_kernel([ur])
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
assert np.all(16.0 == dst_field_llvm)
ur = Assignment(d[0, 0], sp.Pow(f[0, 0], 3.0))
ast = create_kernel([ur])
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
assert np.all(64.0 == dst_field_llvm)
ur = Assignment(d[0, 0], sp.Pow(f[0, 0], 4.0))
ast = create_kernel([ur])
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
assert np.all(256.0 == dst_field_llvm)
if __name__ == "__main__":
test_jacobi_fixed_field_size_gpu()
import numpy as np
import pytest
import sympy as sp
import pystencils
from pystencils.backends.cuda_backend import CudaBackend
from pystencils.backends.opencl_backend import OpenClBackend
from pystencils.opencl.opencljit import get_global_cl_queue, make_python_function
try:
import pyopencl as cl
HAS_OPENCL = True
import pystencils.opencl.autoinit
except Exception:
HAS_OPENCL = False
def test_print_opencl():
z, y, x = pystencils.fields("z, y, x: [2d]")
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
})
print(assignments)
ast = pystencils.create_kernel(assignments, target='gpu')
print(ast)
pystencils.show_code(ast, custom_backend=CudaBackend())
opencl_code = pystencils.get_code_str(ast, custom_backend=OpenClBackend())
print(opencl_code)
assert "__global double * RESTRICT const _data_x" in str(opencl_code)
assert "__global double * RESTRICT" in str(opencl_code)
assert "get_local_id(0)" in str(opencl_code)
@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
def test_opencl_jit_fixed_size():
pytest.importorskip('pycuda')
z, y, x = pystencils.fields("z, y, x: [20,30]")
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
})
print(assignments)
ast = pystencils.create_kernel(assignments, target='gpu')
print(ast)
code = pystencils.show_code(ast, custom_backend=CudaBackend())
print(code)
opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
print(opencl_code)
cuda_kernel = ast.compile()
assert cuda_kernel is not None
import pycuda.gpuarray as gpuarray
x_cpu = np.random.rand(20, 30)
y_cpu = np.random.rand(20, 30)
z_cpu = np.random.rand(20, 30)
x = gpuarray.to_gpu(x_cpu)
y = gpuarray.to_gpu(y_cpu)
z = gpuarray.to_gpu(z_cpu)
cuda_kernel(x=x, y=y, z=z)
result_cuda = z.get()
import pyopencl.array as array
ctx = cl.create_some_context(0)
queue = cl.CommandQueue(ctx)
x = array.to_device(queue, x_cpu)
y = array.to_device(queue, y_cpu)
z = array.to_device(queue, z_cpu)
opencl_kernel = make_python_function(ast, queue, ctx)
assert opencl_kernel is not None
opencl_kernel(x=x, y=y, z=z)
result_opencl = z.get(queue)
assert np.allclose(result_cuda, result_opencl)
@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
def test_opencl_jit():
pytest.importorskip('pycuda')
z, y, x = pystencils.fields("z, y, x: [2d]")
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
})
print(assignments)
ast = pystencils.create_kernel(assignments, target='gpu')
print(ast)
pystencils.show_code(ast, custom_backend=CudaBackend())
pystencils.show_code(ast, custom_backend=OpenClBackend())
cuda_kernel = ast.compile()
assert cuda_kernel is not None
import pycuda.gpuarray as gpuarray
x_cpu = np.random.rand(20, 30)
y_cpu = np.random.rand(20, 30)
z_cpu = np.random.rand(20, 30)
x = gpuarray.to_gpu(x_cpu)
y = gpuarray.to_gpu(y_cpu)
z = gpuarray.to_gpu(z_cpu)
cuda_kernel(x=x, y=y, z=z)
result_cuda = z.get()
import pyopencl.array as array
ctx = cl.create_some_context(0)
queue = cl.CommandQueue(ctx)
x = array.to_device(queue, x_cpu)
y = array.to_device(queue, y_cpu)
z = array.to_device(queue, z_cpu)
opencl_kernel = make_python_function(ast, queue, ctx)
assert opencl_kernel is not None
opencl_kernel(x=x, y=y, z=z)
result_opencl = z.get(queue)
assert np.allclose(result_cuda, result_opencl)
@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
def test_opencl_jit_with_parameter():
pytest.importorskip('pycuda')
z, y, x = pystencils.fields("z, y, x: [2d]")
a = sp.Symbol('a')
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0]) + a
})
print(assignments)
ast = pystencils.create_kernel(assignments, target='gpu')
print(ast)
code = pystencils.show_code(ast, custom_backend=CudaBackend())
print(code)
opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
print(opencl_code)
cuda_kernel = ast.compile()
assert cuda_kernel is not None
import pycuda.gpuarray as gpuarray
x_cpu = np.random.rand(20, 30)
y_cpu = np.random.rand(20, 30)
z_cpu = np.random.rand(20, 30)
x = gpuarray.to_gpu(x_cpu)
y = gpuarray.to_gpu(y_cpu)
z = gpuarray.to_gpu(z_cpu)
cuda_kernel(x=x, y=y, z=z, a=5.)
result_cuda = z.get()
import pyopencl.array as array
ctx = cl.create_some_context(0)
queue = cl.CommandQueue(ctx)
x = array.to_device(queue, x_cpu)
y = array.to_device(queue, y_cpu)
z = array.to_device(queue, z_cpu)
opencl_kernel = make_python_function(ast, queue, ctx)
assert opencl_kernel is not None
opencl_kernel(x=x, y=y, z=z, a=5.)
result_opencl = z.get(queue)
assert np.allclose(result_cuda, result_opencl)
@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
def test_without_cuda():
z, y, x = pystencils.fields("z, y, x: [20,30]")
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
})
print(assignments)
ast = pystencils.create_kernel(assignments, target='gpu')
print(ast)
opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
print(opencl_code)
x_cpu = np.random.rand(20, 30)
y_cpu = np.random.rand(20, 30)
z_cpu = np.random.rand(20, 30)
import pyopencl.array as array
ctx = cl.create_some_context(0)
queue = cl.CommandQueue(ctx)
x = array.to_device(queue, x_cpu)
y = array.to_device(queue, y_cpu)
z = array.to_device(queue, z_cpu)
opencl_kernel = make_python_function(ast, queue, ctx)
assert opencl_kernel is not None
opencl_kernel(x=x, y=y, z=z)
@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
def test_kernel_creation():
global pystencils
z, y, x = pystencils.fields("z, y, x: [20,30]")
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
})
print(assignments)
import pystencils.opencl.autoinit
ast = pystencils.create_kernel(assignments, target='opencl')
print(ast.backend)
code = pystencils.get_code_str(ast)
print(code)
assert 'get_local_size' in code
opencl_kernel = ast.compile()
x_cpu = np.random.rand(20, 30)
y_cpu = np.random.rand(20, 30)
z_cpu = np.random.rand(20, 30)
import pyopencl.array as array
assert get_global_cl_queue()
x = array.to_device(get_global_cl_queue(), x_cpu)
y = array.to_device(get_global_cl_queue(), y_cpu)
z = array.to_device(get_global_cl_queue(), z_cpu)
assert opencl_kernel is not None
opencl_kernel(x=x, y=y, z=z)
%% Cell type:code id: tags:
``` python
import pytest
pytest.importorskip('pycuda')
```
%% Cell type:code id: tags:
``` python
from pystencils.session import *
sp.init_printing()
frac = sp.Rational
```
%% Cell type:markdown id: tags:
# Phase-field simulation of dentritic solidification in 3D
This notebook tests the model presented in the dentritic growth tutorial in 3D.
%% Cell type:code id: tags:
``` python
target = 'gpu'
gpu = target == 'gpu'
domain_size = (25, 25, 25) if 'is_test_run' in globals() else (300, 300, 300)
dh = ps.create_data_handling(domain_size=domain_size, periodicity=True, default_target=target)
φ_field = dh.add_array('phi', latex_name='φ')
φ_delta_field = dh.add_array('phidelta', latex_name='φ_D')
t_field = dh.add_array('T')
```
%% Cell type:code id: tags:
``` python
ε, m, δ, j, θzero, α, γ, Teq, κ, τ = sp.symbols("ε m δ j θ_0 α γ T_eq κ τ")
εb = sp.Symbol("\\bar{\\epsilon}")
discretize = ps.fd.Discretization2ndOrder(dx=0.03, dt=1e-5)
φ = φ_field.center
T = t_field.center
d = ps.fd.Diff
def f(φ, m):
return φ**4 / 4 - (frac(1, 2) - m/3) * φ**3 + (frac(1,4)-m/2)*φ**2
bulk_free_energy_density = f(φ, m)
interface_free_energy_density = ε ** 2 / 2 * (d(φ, 0) ** 2 + d(φ, 1) ** 2 + d(φ, 2) ** 2)
```
%% Cell type:markdown id: tags:
Here comes the major change, that has to be made for the 3D model: $\epsilon$ depends on the interface normal, which can not be computed simply as atan() as in the 2D case
%% Cell type:code id: tags:
``` python
n = sp.Matrix([d(φ, i) for i in range(3)])
nLen = sp.sqrt(sum(n_i**2 for n_i in n))
n = n / nLen
nVal = sum(n_i**4 for n_i in n)
σ = δ * nVal
εVal = εb * (1 + σ)
εVal
```
%% Output
$\displaystyle \bar{\epsilon} \left(δ \left(\frac{{\partial_{0} {{φ}_{(0,0,0)}}}^{4}}{\left({\partial_{0} {{φ}_{(0,0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0,0)}}}^{2} + {\partial_{2} {{φ}_{(0,0,0)}}}^{2}\right)^{2}} + \frac{{\partial_{1} {{φ}_{(0,0,0)}}}^{4}}{\left({\partial_{0} {{φ}_{(0,0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0,0)}}}^{2} + {\partial_{2} {{φ}_{(0,0,0)}}}^{2}\right)^{2}} + \frac{{\partial_{2} {{φ}_{(0,0,0)}}}^{4}}{\left({\partial_{0} {{φ}_{(0,0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0,0)}}}^{2} + {\partial_{2} {{φ}_{(0,0,0)}}}^{2}\right)^{2}}\right) + 1\right)$
⎛ ⎛ 4
⎜ ⎜ D(φ[0,0,0])
\bar{\epsilon}⋅⎜δ⋅⎜───────────────────────────────────────────── + ───────────
⎜ ⎜ 2
⎜ ⎜⎛ 2 2 2⎞ ⎛
⎝ ⎝⎝D(φ[0,0,0]) + D(φ[0,0,0]) + D(φ[0,0,0]) ⎠ ⎝D(φ[0,0,0]
4 4
D(φ[0,0,0]) D(φ[0,0,0])
────────────────────────────────── + ─────────────────────────────────────────
2
2 2 2⎞ ⎛ 2 2
) + D(φ[0,0,0]) + D(φ[0,0,0]) ⎠ ⎝D(φ[0,0,0]) + D(φ[0,0,0]) + D(φ[0,0,0]
⎞ ⎞
⎟ ⎟
────⎟ + 1⎟
2⎟ ⎟
2⎞ ⎟ ⎟
) ⎠ ⎠ ⎠
%% Cell type:code id: tags:
``` python
def m_func(temperature):
return (α / sp.pi) * sp.atan(γ * (Teq - temperature))
```
%% Cell type:code id: tags:
``` python
substitutions = {m: m_func(T),
ε: εVal}
fe_i = interface_free_energy_density.subs(substitutions)
fe_b = bulk_free_energy_density.subs(substitutions)
μ_if = ps.fd.expand_diff_full(ps.fd.functional_derivative(fe_i, φ), functions=[φ])
μ_b = ps.fd.expand_diff_full(ps.fd.functional_derivative(fe_b, φ), functions=[φ])
```
%% Cell type:code id: tags:
``` python
dF_dφ = μ_b + sp.Piecewise((μ_if, nLen**2 > 1e-10), (0, True))
```
%% Cell type:code id: tags:
``` python
parameters = {
τ: 0.0003,
κ: 1.8,
εb: 0.01,
δ: 0.3,
γ: 10,
j: 6,
α: 0.9,
Teq: 1.0,
θzero: 0.2,
sp.pi: sp.pi.evalf()
}
parameters
```
%% Output
$\displaystyle \left\{ \pi : 3.14159265358979, \ T_{eq} : 1.0, \ \bar{\epsilon} : 0.01, \ j : 6, \ α : 0.9, \ γ : 10, \ δ : 0.3, \ θ_{0} : 0.2, \ κ : 1.8, \ τ : 0.0003\right\}$
{π: 3.14159265358979, T_eq: 1.0, \bar{\epsilon}: 0.01, j: 6, α: 0.9, γ: 10, δ:
0.3, θ₀: 0.2, κ: 1.8, τ: 0.0003}
%% Cell type:code id: tags:
``` python
dφ_dt = - dF_dφ / τ
assignments = [
ps.Assignment(φ_delta_field.center, discretize(dφ_dt.subs(parameters))),
]
φEqs = ps.simp.sympy_cse_on_assignment_list(assignments)
φEqs.append(ps.Assignment(φ, discretize(ps.fd.transient(φ) - φ_delta_field.center)))
temperatureEvolution = -ps.fd.transient(T) + ps.fd.diffusion(T, 1) + κ * φ_delta_field.center
temperatureEqs = [
ps.Assignment(T, discretize(temperatureEvolution.subs(parameters)))
]
```
%% Cell type:code id: tags:
``` python
temperatureEqs
```
%% Output
$\displaystyle \left[ {{T}_{(0,0,0)}} \leftarrow 0.0111111111111111 {{T}_{(-1,0,0)}} + 0.0111111111111111 {{T}_{(0,-1,0)}} + 0.0111111111111111 {{T}_{(0,0,-1)}} + 0.933333333333333 {{T}_{(0,0,0)}} + 0.0111111111111111 {{T}_{(0,0,1)}} + 0.0111111111111111 {{T}_{(0,1,0)}} + 0.0111111111111111 {{T}_{(1,0,0)}} + 1.8 \cdot 10^{-5} {{φ_D}_{(0,0,0)}}\right]$
[T_C := 0.0111111111111111⋅T_W + 0.0111111111111111⋅T_S + 0.0111111111111111⋅T
_B + 0.933333333333333⋅T_C + 0.0111111111111111⋅T_T + 0.0111111111111111⋅T_N +
0.0111111111111111⋅T_E + 1.8e-5⋅phidelta_C]
%% Cell type:code id: tags:
``` python
φ_kernel = ps.create_kernel(φEqs, cpu_openmp=4, target=target).compile()
temperatureKernel = ps.create_kernel(temperatureEqs, cpu_openmp=4, target=target).compile()
```
%% Cell type:code id: tags:
``` python
def time_loop(steps):
φ_sync = dh.synchronization_function(['phi'], target=target)
temperature_sync = dh.synchronization_function(['T'], target=target)
dh.all_to_gpu()
for t in range(steps):
φ_sync()
dh.run_kernel(φ_kernel)
temperature_sync()
dh.run_kernel(temperatureKernel)
dh.all_to_cpu()
def init(nucleus_size=np.sqrt(5)):
for b in dh.iterate():
x, y, z = b.cell_index_arrays
x, y, z = x - b.shape[0] // 2, y - b.shape[1] // 2, z - b.shape[2] // 2
b['phi'].fill(0)
b['phi'][(x ** 2 + y ** 2 + z ** 2) < nucleus_size ** 2] = 1.0
b['T'].fill(0.0)
def plot(slice_obj=ps.make_slice[:, :, 0.5]):
plt.subplot(1, 3, 1)
plt.scalar_field(dh.gather_array('phi', slice_obj).squeeze())
plt.title("φ")
plt.colorbar()
plt.subplot(1, 3, 2)
plt.title("T")
plt.scalar_field(dh.gather_array('T', slice_obj).squeeze())
plt.colorbar()
plt.subplot(1, 3, 3)
plt.title("∂φ")
plt.scalar_field(dh.gather_array('phidelta', slice_obj).squeeze())
plt.colorbar()
```
%% Cell type:code id: tags:
``` python
init()
plot()
print(dh)
```
%% Output
Name| Inner (min/max)| WithGl (min/max)
----------------------------------------------------
T| ( 0, 0)| ( 0, 0)
phi| ( 0, 1)| ( 0, 1)
phidelta| ( 0, 0)| ( 0, 0)
%% Cell type:code id: tags:
``` python
if 'is_test_run' in globals():
time_loop(2)
assert np.isfinite(dh.max('phi'))
assert np.isfinite(dh.max('T'))
assert np.isfinite(dh.max('phidelta'))
else:
from time import perf_counter
vtk_writer = dh.create_vtk_writer('dentritic_growth_large', ['phi'])
last = perf_counter()
for i in range(300):
time_loop(100)
vtk_writer(i)
print("Step ", i, perf_counter() - last, dh.max('phi'))
last = perf_counter()
```
import pytest
import pystencils
from sympy import oo
@pytest.mark.parametrize('type', ('float32', 'float64', 'int64'))
@pytest.mark.parametrize('negative', (False, 'Negative'))
@pytest.mark.parametrize('target', ('cpu', 'gpu'))
def test_print_infinity(type, negative, target):
x = pystencils.fields(f'x: {type}[1d]')
if negative:
assignment = pystencils.Assignment(x.center, -oo)
else:
assignment = pystencils.Assignment(x.center, oo)
ast = pystencils.create_kernel(assignment, data_type=type, target=target)
if target == 'gpu':
pytest.importorskip('pycuda')
ast.compile()
print(ast.compile().code)
# -*- coding: utf-8 -*-
#
# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
#
# Distributed under terms of the GPLv3 license.
"""
"""
import pytest
import pystencils
from pystencils.backends.cbackend import CBackend
class UnsupportedNode(pystencils.astnodes.Node):
def __init__(self):
super().__init__()
def test_print_unsupported_node():
with pytest.raises(NotImplementedError, match='CBackend does not support node of type UnsupportedNode'):
CBackend()(UnsupportedNode())
import numpy as np
import pytest
import pystencils as ps
from pystencils.rng import PhiloxFourFloats, PhiloxTwoDoubles, AESNIFourFloats, AESNITwoDoubles
# curand_Philox4x32_10(make_uint4(124, i, j, 0), make_uint2(0, 0))
philox_reference = np.array([[[3576608082, 1252663339, 1987745383, 348040302],
[1032407765, 970978240, 2217005168, 2424826293]],
[[2958765206, 3725192638, 2623672781, 1373196132],
[ 850605163, 1694561295, 3285694973, 2799652583]]])
def test_philox_double():
for target in ('cpu', 'gpu'):
if target == 'gpu':
pytest.importorskip('pycuda')
dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target=target)
f = dh.add_array("f", values_per_cell=2)
dh.fill('f', 42.0)
philox_node = PhiloxTwoDoubles(dh.dim)
assignments = [philox_node,
ps.Assignment(f(0), philox_node.result_symbols[0]),
ps.Assignment(f(1), philox_node.result_symbols[1])]
kernel = ps.create_kernel(assignments, target=dh.default_target).compile()
dh.all_to_gpu()
dh.run_kernel(kernel, time_step=124)
dh.all_to_cpu()
arr = dh.gather_array('f')
assert np.logical_and(arr <= 1.0, arr >= 0).all()
x = philox_reference[:,:,0::2]
y = philox_reference[:,:,1::2]
z = x ^ y << (53 - 32)
double_reference = z * 2.**-53 + 2.**-54
assert(np.allclose(arr, double_reference, rtol=0, atol=np.finfo(np.float64).eps))
def test_philox_float():
for target in ('cpu', 'gpu'):
if target == 'gpu':
pytest.importorskip('pycuda')
dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target=target)
f = dh.add_array("f", values_per_cell=4)
dh.fill('f', 42.0)
philox_node = PhiloxFourFloats(dh.dim)
assignments = [philox_node] + [ps.Assignment(f(i), philox_node.result_symbols[i]) for i in range(4)]
kernel = ps.create_kernel(assignments, target=dh.default_target).compile()
dh.all_to_gpu()
dh.run_kernel(kernel, time_step=124)
dh.all_to_cpu()
arr = dh.gather_array('f')
assert np.logical_and(arr <= 1.0, arr >= 0).all()
float_reference = philox_reference * 2.**-32 + 2.**-33
assert(np.allclose(arr, float_reference, rtol=0, atol=np.finfo(np.float32).eps))
def test_aesni_double():
dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target="cpu")
f = dh.add_array("f", values_per_cell=2)
dh.fill('f', 42.0)
aesni_node = AESNITwoDoubles(dh.dim)
assignments = [aesni_node,
ps.Assignment(f(0), aesni_node.result_symbols[0]),
ps.Assignment(f(1), aesni_node.result_symbols[1])]
kernel = ps.create_kernel(assignments, target=dh.default_target).compile()
dh.all_to_gpu()
dh.run_kernel(kernel, time_step=124)
dh.all_to_cpu()
arr = dh.gather_array('f')
assert np.logical_and(arr <= 1.0, arr >= 0).all()
def test_aesni_float():
dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target="cpu")
f = dh.add_array("f", values_per_cell=4)
dh.fill('f', 42.0)
aesni_node = AESNIFourFloats(dh.dim)
assignments = [aesni_node] + [ps.Assignment(f(i), aesni_node.result_symbols[i]) for i in range(4)]
kernel = ps.create_kernel(assignments, target=dh.default_target).compile()
dh.all_to_gpu()
dh.run_kernel(kernel, time_step=124)
dh.all_to_cpu()
arr = dh.gather_array('f')
assert np.logical_and(arr <= 1.0, arr >= 0).all()
\ No newline at end of file
import numpy as np
import pytest
from pystencils import Assignment, Field
try:
from pystencils.llvm import create_kernel, make_python_function
except ModuleNotFoundError:
pytest.importorskip("llvmlite")
def test_size_check():
"""Kernel with two fixed-sized fields creating with same size but calling with wrong size"""
src = np.zeros((20, 21, 9))
dst = np.zeros_like(src)
sym_src = Field.create_from_numpy_array("src", src, index_dimensions=1)
sym_dst = Field.create_from_numpy_array("dst", dst, index_dimensions=1)
update_rule = Assignment(sym_dst(0),
sym_src[-1, 1](1) + sym_src[1, -1](2))
ast = create_kernel([update_rule])
func = make_python_function(ast)
# change size of src field
new_shape = [a - 7 for a in src.shape]
src = np.zeros(new_shape)
dst = np.zeros(new_shape)
try:
func(src=src, dst=dst)
assert False, "Expected ValueError because fields with different sized where passed"
except ValueError:
pass
def test_fixed_size_mismatch_check():
"""Create kernel with two differently sized but constant fields """
src = np.zeros((20, 21, 9))
dst = np.zeros((21, 21, 9))
sym_src = Field.create_from_numpy_array("src", src, index_dimensions=1)
sym_dst = Field.create_from_numpy_array("dst", dst, index_dimensions=1)
update_rule = Assignment(sym_dst(0),
sym_src[-1, 1](1) + sym_src[1, -1](2))
try:
create_kernel([update_rule])
assert False, "Expected ValueError because fields with different sized where passed"
except ValueError:
pass
def test_fixed_and_variable_field_check():
"""Create kernel with two variable sized fields - calling them with different sizes"""
src = np.zeros((20, 21, 9))
sym_src = Field.create_from_numpy_array("src", src, index_dimensions=1)
sym_dst = Field.create_generic("dst", spatial_dimensions=2, index_dimensions=1)
update_rule = Assignment(sym_dst(0),
sym_src[-1, 1](1) + sym_src[1, -1](2))
try:
create_kernel([update_rule])
assert False, "Expected ValueError because fields with different sized where passed"
except ValueError:
pass
def test_two_variable_shaped_fields():
src = np.zeros((20, 21, 9))
dst = np.zeros((22, 21, 9))
sym_src = Field.create_generic("src", spatial_dimensions=2, index_dimensions=1)
sym_dst = Field.create_generic("dst", spatial_dimensions=2, index_dimensions=1)
update_rule = Assignment(sym_dst(0),
sym_src[-1, 1](1) + sym_src[1, -1](2))
ast = create_kernel([update_rule])
func = make_python_function(ast)
try:
func(src=src, dst=dst)
assert False, "Expected ValueError because fields with different sized where passed"
except ValueError:
pass
import numpy as np
import sympy as sp
from pystencils import Assignment, Field, TypedSymbol, create_kernel, make_slice
from pystencils.simp import sympy_cse_on_assignment_list
def test_sliced_iteration():
size = (4, 4)
src_arr = np.ones(size)
dst_arr = np.zeros_like(src_arr)
src_field = Field.create_from_numpy_array('src', src_arr)
dst_field = Field.create_from_numpy_array('dst', dst_arr)
a, b = sp.symbols("a b")
update_rule = Assignment(dst_field[0, 0],
(a * src_field[0, 1] + a * src_field[0, -1] +
b * src_field[1, 0] + b * src_field[-1, 0]) / 4)
x_end = TypedSymbol("x_end", "int")
s = make_slice[1:x_end, 1]
x_end_value = size[1] - 1
kernel = create_kernel(sympy_cse_on_assignment_list([update_rule]), iteration_slice=s).compile()
kernel(src=src_arr, dst=dst_arr, a=1.0, b=1.0, x_end=x_end_value)
expected_result = np.zeros(size)
expected_result[1:x_end_value, 1] = 1
np.testing.assert_almost_equal(expected_result, dst_arr)
def test_sliced_iteration_llvm():
import pytest
pytest.importorskip("llvmlite")
size = (4, 4)
src_arr = np.ones(size)
dst_arr = np.zeros_like(src_arr)
src_field = Field.create_from_numpy_array('src', src_arr)
dst_field = Field.create_from_numpy_array('dst', dst_arr)
a, b = sp.symbols("a b")
update_rule = Assignment(dst_field[0, 0],
(a * src_field[0, 1] + a * src_field[0, -1] +
b * src_field[1, 0] + b * src_field[-1, 0]) / 4)
x_end = TypedSymbol("x_end", "int")
s = make_slice[1:x_end, 1]
x_end_value = size[1] - 1
import pystencils.llvm as llvm_generator
ast = llvm_generator.create_kernel(sympy_cse_on_assignment_list([update_rule]), iteration_slice=s)
kernel = llvm_generator.make_python_function(ast)
kernel(src=src_arr, dst=dst_arr, a=1.0, b=1.0, x_end=x_end_value)
expected_result = np.zeros(size)
expected_result[1:x_end_value, 1] = 1
np.testing.assert_almost_equal(expected_result, dst_arr)
%% Cell type:code id: tags:
``` python
from pystencils.session import *
from time import perf_counter
from statistics import median
from functools import partial
```
%% Cell type:markdown id: tags:
## Benchmark for Python call overhead
%% Cell type:code id: tags:
``` python
inner_repeats = 100
outer_repeats = 5
sizes = [2**i for i in range(1, 8)]
sizes
```
%% Output
$$\left [ 2, \quad 4, \quad 8, \quad 16, \quad 32, \quad 64, \quad 128\right ]$$
[2, 4, 8, 16, 32, 64, 128]
%% Cell type:code id: tags:
``` python
def benchmark_pure(domain_size, extract_first=False):
src = np.zeros(domain_size)
dst = np.zeros_like(src)
f_src, f_dst = ps.fields("src, dst", src=src, dst=dst)
kernel = ps.create_kernel(ps.Assignment(f_dst.center, f_src.center)).compile()
if extract_first:
kernel = kernel.kernel
start = perf_counter()
for i in range(inner_repeats):
kernel(src=src, dst=dst)
src, dst = dst, src
end = perf_counter()
else:
start = perf_counter()
for i in range(inner_repeats):
kernel(src=src, dst=dst)
src, dst = dst, src
end = perf_counter()
return (end - start) / inner_repeats
def benchmark_datahandling(domain_size, parallel=False):
dh = ps.create_data_handling(domain_size, parallel=parallel)
f_src = dh.add_array('src')
f_dst = dh.add_array('dst')
kernel = ps.create_kernel(ps.Assignment(f_dst.center, f_src.center)).compile()
start = perf_counter()
for i in range(inner_repeats):
dh.run_kernel(kernel)
dh.swap('src', 'dst')
end = perf_counter()
return (end - start) / inner_repeats
name_to_func = {
'pure_extract': partial(benchmark_pure, extract_first=True),
'pure_no_extract': partial(benchmark_pure, extract_first=False),
'dh_serial': partial(benchmark_datahandling, parallel=False),
'dh_parallel': partial(benchmark_datahandling, parallel=True),
}
```
%% Cell type:code id: tags:
``` python
result = {'block_size': [],
'name': [],
'time': []}
for bs in sizes:
print("Computing size ", bs)
for name, func in name_to_func.items():
for i in range(outer_repeats):
time = func((bs, bs))
result['block_size'].append(bs)
result['name'].append(name)
result['time'].append(time)
```
%% Output
Computing size 2
Computing size 4
Computing size 8
Computing size 16
Computing size 32
Computing size 64
Computing size 128
%% Cell type:code id: tags:
``` python
if 'is_test_run' not in globals():
import pandas as pd
import seaborn as sns
data = pd.DataFrame.from_dict(result)
plt.subplot(1,2,1)
sns.barplot(x='block_size', y='time', hue='name', data=data, alpha=0.6)
plt.yscale('log')
plt.subplot(1,2,2)
data = pd.DataFrame.from_dict(result)
sns.barplot(x='block_size', y='time', hue='name', data=data, alpha=0.6)
```
%% Output
# -*- coding: utf-8 -*-
#
# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
#
# Distributed under terms of the GPLv3 license.
"""
"""
import numpy as np
import sympy
from sympy.abc import k
import pystencils
from pystencils.data_types import create_type
def test_sum():
sum = sympy.Sum(k, (k, 1, 100))
expanded_sum = sum.doit()
print(sum)
print(expanded_sum)
x = pystencils.fields('x: float32[1d]')
assignments = pystencils.AssignmentCollection({
x.center(): sum
})
ast = pystencils.create_kernel(assignments)
code = str(pystencils.get_code_obj(ast))
kernel = ast.compile()
print(code)
assert 'double sum' in code
array = np.zeros((10,), np.float32)
kernel(x=array)
assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
def test_sum_use_float():
sum = sympy.Sum(k, (k, 1, 100))
expanded_sum = sum.doit()
print(sum)
print(expanded_sum)
x = pystencils.fields('x: float32[1d]')
assignments = pystencils.AssignmentCollection({
x.center(): sum
})
ast = pystencils.create_kernel(assignments, data_type=create_type('float32'))
code = str(pystencils.get_code_obj(ast))
kernel = ast.compile()
print(code)
print(pystencils.get_code_obj(ast))
assert 'float sum' in code
array = np.zeros((10,), np.float32)
kernel(x=array)
assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
def test_product():
k = pystencils.TypedSymbol('k', create_type('int64'))
sum = sympy.Product(k, (k, 1, 10))
expanded_sum = sum.doit()
print(sum)
print(expanded_sum)
x = pystencils.fields('x: int64[1d]')
assignments = pystencils.AssignmentCollection({
x.center(): sum
})
ast = pystencils.create_kernel(assignments)
code = pystencils.get_code_str(ast)
kernel = ast.compile()
print(code)
assert 'int64_t product' in code
array = np.zeros((10,), np.int64)
kernel(x=array)
assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
def test_prod_var_limit():
k = pystencils.TypedSymbol('k', create_type('int64'))
limit = pystencils.TypedSymbol('limit', create_type('int64'))
sum = sympy.Sum(k, (k, 1, limit))
expanded_sum = sum.replace(limit, 100).doit()
print(sum)
print(expanded_sum)
x = pystencils.fields('x: int64[1d]')
assignments = pystencils.AssignmentCollection({
x.center(): sum
})
ast = pystencils.create_kernel(assignments)
pystencils.show_code(ast)
kernel = ast.compile()
array = np.zeros((10,), np.int64)
kernel(x=array, limit=100)
assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
import pytest
import sympy as sp
import pystencils
from pystencils.math_optimizations import HAS_REWRITING, optimize_assignments, optims_pystencils_cpu
@pytest.mark.skipif(not HAS_REWRITING, reason="need sympy.codegen.rewriting")
def test_sympy_optimizations():
for target in ('cpu', 'gpu'):
x, y, z = pystencils.fields('x, y, z: float32[2d]')
# Triggers Sympy's expm1 optimization
# Sympy's expm1 optimization is tedious to use and the behaviour is highly depended on the sympy version. In
# some cases the exp expression has to be encapsulated in brackets or multiplied with 1 or 1.0
# for sympy to work properly ...
assignments = pystencils.AssignmentCollection({
x[0, 0]: 1.0 * (sp.exp(y[0, 0]) - 1)
})
assignments = optimize_assignments(assignments, optims_pystencils_cpu)
print(assignments)
ast = pystencils.create_kernel(assignments, target=target)
code = pystencils.get_code_str(ast)
assert 'expm1(' in code
@pytest.mark.skipif(not HAS_REWRITING, reason="need sympy.codegen.rewriting")
def test_evaluate_constant_terms():
for target in ('cpu', 'gpu'):
x, y, z = pystencils.fields('x, y, z: float32[2d]')
# Triggers Sympy's cos optimization
assignments = pystencils.AssignmentCollection({
x[0, 0]: -sp.cos(1) + y[0, 0]
})
assignments = optimize_assignments(assignments, optims_pystencils_cpu)
ast = pystencils.create_kernel(assignments, target=target)
code = pystencils.get_code_str(ast)
assert 'cos(' not in code
print(code)
@pytest.mark.skipif(not HAS_REWRITING, reason="need sympy.codegen.rewriting")
def test_do_not_evaluate_constant_terms():
optimizations = pystencils.math_optimizations.optims_pystencils_cpu
optimizations.remove(pystencils.math_optimizations.evaluate_constant_terms)
for target in ('cpu', 'gpu'):
x, y, z = pystencils.fields('x, y, z: float32[2d]')
assignments = pystencils.AssignmentCollection({
x[0, 0]: -sp.cos(1) + y[0, 0]
})
ast = pystencils.create_kernel(assignments, target=target)
code = pystencils.get_code_str(ast)
assert 'cos(' in code
print(code)
from sympy.abc import a, b, c, d, e, f
import pystencils
from pystencils.data_types import cast_func, create_type
def test_type_interference():
x = pystencils.fields('x: float32[3d]')
assignments = pystencils.AssignmentCollection({
a: cast_func(10, create_type('float64')),
b: cast_func(10, create_type('uint16')),
e: 11,
c: b,
f: c + b,
d: c + b + x.center + e,
x.center: c + b + x.center
})
ast = pystencils.create_kernel(assignments)
code = str(pystencils.get_code_str(ast))
assert 'double a' in code
assert 'uint16_t b' in code
assert 'uint16_t f' in code
assert 'int64_t e' in code
import sympy as sp
import numpy as np
import pystencils as ps
from pystencils import data_types
from pystencils.data_types import TypedSymbol, get_type_of_expression, VectorType, collate_types, create_type
def test_parsing():
assert str(data_types.create_composite_type_from_string("const double *")) == "double const *"
assert str(data_types.create_composite_type_from_string("double const *")) == "double const *"
t1 = data_types.create_composite_type_from_string("const double * const * const restrict")
t2 = data_types.create_composite_type_from_string(str(t1))
assert t1 == t2
def test_collation():
double_type = create_type("double")
float_type = create_type("float32")
double4_type = VectorType(double_type, 4)
float4_type = VectorType(float_type, 4)
assert collate_types([double_type, float_type]) == double_type
assert collate_types([double4_type, float_type]) == double4_type
assert collate_types([double4_type, float4_type]) == double4_type
def test_dtype_of_constants():
# Some come constants are neither of type Integer,Float,Rational and don't have args
# >>> isinstance(pi, Integer)
# False
# >>> isinstance(pi, Float)
# False
# >>> isinstance(pi, Rational)
# False
# >>> pi.args
# ()
get_type_of_expression(sp.pi)
def test_assumptions():
x = ps.fields('x: float32[3d]')
assert x.shape[0].is_nonnegative
assert (2 * x.shape[0]).is_nonnegative
assert (2 * x.shape[0]).is_integer
assert (TypedSymbol('a', create_type('uint64'))).is_nonnegative
assert (TypedSymbol('a', create_type('uint64'))).is_positive is None
assert (TypedSymbol('a', create_type('uint64')) + 1).is_positive
assert (x.shape[0] + 1).is_real
def test_sqrt_of_integer():
"""Regression test for bug where sqrt(3) was classified as integer"""
f = ps.fields("f: [1D]")
tmp = sp.symbols("tmp")
assignments = [ps.Assignment(tmp, sp.sqrt(3)),
ps.Assignment(f[0], tmp)]
arr = np.array([1], dtype=np.float64)
kernel = ps.create_kernel(assignments).compile()
kernel(f=arr)
assert 1.7 < arr[0] < 1.8
[pytest] [pytest]
testpaths = src tests doc/notebooks
pythonpath = src
python_files = test_*.py *_test.py scenario_*.py python_files = test_*.py *_test.py scenario_*.py
norecursedirs = *.egg-info .git .cache .ipynb_checkpoints htmlcov norecursedirs = *.egg-info .git .cache .ipynb_checkpoints htmlcov
addopts = --doctest-modules --durations=20 --cov-config pytest.ini addopts = --doctest-modules --durations=20 --cov-config pytest.ini
markers = markers =
kerncraft: tests depending on kerncraft longrun: tests only run at night since they have large execution time
notebook: mark for notebooks notebook: mark for notebooks
# these warnings all come from third party libraries.
filterwarnings =
ignore:an integer is required:DeprecationWarning
ignore:\s*load will be removed, use:PendingDeprecationWarning
ignore:the imp module is deprecated in favour of importlib:DeprecationWarning
ignore:.*is a deprecated alias for the builtin `bool`:DeprecationWarning
ignore:'contextfilter' is renamed to 'pass_context':DeprecationWarning
ignore:Using or importing the ABCs from 'collections' instead of from 'collections.abc':DeprecationWarning
ignore:Animation was deleted without rendering anything:UserWarning
[run] [run]
branch = True branch = True
source = pystencils source = src/pystencils
pystencils_tests tests
omit = doc/* omit = doc/*
pystencils_tests/* tests/*
setup.py setup.py
quicktest.py
conftest.py conftest.py
pystencils/jupytersetup.py versioneer.py
pystencils/cpu/msvc_detection.py src/pystencils/jupytersetup.py
pystencils/sympy_gmpy_bug_workaround.py src/pystencils/cpu/msvc_detection.py
pystencils/cache.py src/pystencils/sympy_gmpy_bug_workaround.py
pystencils/pacxx/benchmark.py src/pystencils/cache.py
src/pystencils/pacxx/benchmark.py
src/pystencils/_version.py
venv/
[report] [report]
exclude_lines = exclude_lines =
......
#!/usr/bin/env python3
from contextlib import redirect_stdout
import io
from tests.test_quicktests import (
test_basic_kernel,
test_basic_blocking_staggered,
test_basic_vectorization,
)
quick_tests = [
test_basic_kernel,
test_basic_blocking_staggered,
test_basic_vectorization,
]
if __name__ == "__main__":
print("Running pystencils quicktests")
for qt in quick_tests:
print(f" -> {qt.__name__}")
with redirect_stdout(io.StringIO()):
qt()
import distutils from setuptools import setup, __version__ as setuptools_version
import io
import os
import sys
from contextlib import redirect_stdout
from importlib import import_module
import setuptools if int(setuptools_version.split('.')[0]) < 61:
raise Exception(
"[ERROR] pystencils requires at least setuptools version 61 to install.\n"
"If this error occurs during an installation via pip, it is likely that there is a conflict between "
"versions of setuptools installed by pip and the system package manager. "
"In this case, it is recommended to install pystencils into a virtual environment instead."
)
try: import versioneer
import cython # noqa
USE_CYTHON = True
except ImportError:
USE_CYTHON = False
quick_tests = [
'test_datahandling.test_kernel',
'test_blocking_staggered.test_blocking_staggered',
'test_blocking_staggered.test_blocking_staggered',
'test_vectorization.test_vectorization_variable_size',
]
def get_cmdclass():
return versioneer.get_cmdclass()
class SimpleTestRunner(distutils.cmd.Command):
"""A custom command to run selected tests"""
description = 'run some quick tests' setup(
user_options = [] version=versioneer.get_version(),
cmdclass=get_cmdclass(),
@staticmethod )
def _run_tests_in_module(test):
"""Short test runner function - to work also if py.test is not installed."""
test = 'pystencils_tests.' + test
mod, function_name = test.rsplit('.', 1)
if isinstance(mod, str):
mod = import_module(mod)
func = getattr(mod, function_name)
print(" -> %s in %s" % (function_name, mod.__name__))
with redirect_stdout(io.StringIO()):
func()
def initialize_options(self):
pass
def finalize_options(self):
pass
def run(self):
"""Run command."""
for test in quick_tests:
self._run_tests_in_module(test)
def readme():
with open('README.md') as f:
return f.read()
def cython_extensions(*extensions):
from distutils.extension import Extension
ext = '.pyx'
result = [Extension(e, [e.replace('.', '/') + ext]) for e in extensions]
from Cython.Build import cythonize
return cythonize(result, language_level=3)
try:
sys.path.insert(0, os.path.abspath('doc'))
from version_from_git import version_number_from_git
version = version_number_from_git()
with open("RELEASE-VERSION", "w") as f:
f.write(version)
except ImportError:
version = open('RELEASE-VERSION', 'r').read()
setuptools.setup(name='pystencils',
description='Speeding up stencil computations on CPUs and GPUs',
version=version,
long_description=readme(),
long_description_content_type="text/markdown",
author='Martin Bauer',
license='AGPLv3',
author_email='martin.bauer@fau.de',
url='https://i10git.cs.fau.de/pycodegen/pystencils/',
packages=['pystencils'] + ['pystencils.' + s for s in setuptools.find_packages('pystencils')],
install_requires=['sympy>=1.1', 'numpy', 'appdirs', 'joblib'],
package_data={'pystencils': ['include/*.h',
'backends/cuda_known_functions.txt',
'backends/opencl1.1_known_functions.txt']},
ext_modules=cython_extensions("pystencils.boundaries.createindexlistcython") if USE_CYTHON else [],
classifiers=[
'Development Status :: 4 - Beta',
'Framework :: Jupyter',
'Topic :: Software Development :: Code Generators',
'Topic :: Scientific/Engineering :: Physics',
'Intended Audience :: Developers',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',
],
project_urls={
"Bug Tracker": "https://i10git.cs.fau.de/pycodegen/pystencils/issues",
"Documentation": "http://pycodegen.pages.walberla.net/pystencils/",
"Source Code": "https://i10git.cs.fau.de/pycodegen/pystencils",
},
extras_require={
'gpu': ['pycuda'],
'opencl': ['pyopencl'],
'alltrafos': ['islpy', 'py-cpuinfo'],
'bench_db': ['blitzdb', 'pymongo', 'pandas'],
'interactive': ['matplotlib', 'ipy_table', 'imageio', 'jupyter', 'pyevtk', 'rich', 'graphviz'],
'autodiff': ['pystencils-autodiff'],
'doc': ['sphinx', 'sphinx_rtd_theme', 'nbsphinx',
'sphinxcontrib-bibtex', 'sphinx_autodoc_typehints', 'pandoc'],
},
tests_require=['pytest',
'pytest-cov',
'pytest-html',
'ansi2html',
'pytest-xdist',
'flake8',
'nbformat',
'nbconvert',
'ipython'],
python_requires=">=3.6",
cmdclass={
'quicktest': SimpleTestRunner
},
)
"""Module to generate stencil kernels in C or CUDA using sympy expressions and call them as Python functions""" """Module to generate stencil kernels in C or CUDA using sympy expressions and call them as Python functions"""
from . import sympy_gmpy_bug_workaround # NOQA from .enums import Backend, Target
from . import fd from . import fd
from . import stencil as stencil from . import stencil as stencil
from .assignment import Assignment, assignment_from_stencil from .assignment import Assignment, AddAugmentedAssignment, assignment_from_stencil
from .data_types import TypedSymbol from .typing.typed_sympy import TypedSymbol
from .datahandling import create_data_handling from .display_utils import get_code_obj, get_code_str, show_code, to_dot
from .display_utils import show_code, get_code_obj, get_code_str, to_dot
from .field import Field, FieldType, fields from .field import Field, FieldType, fields
from .kernel_decorator import kernel from .config import CreateKernelConfig
from .kernelcreation import create_indexed_kernel, create_kernel, create_staggered_kernel from .cache import clear_cache
from .kernel_decorator import kernel, kernel_config
from .kernelcreation import create_kernel, create_staggered_kernel
from .simp import AssignmentCollection from .simp import AssignmentCollection
from .slicing import make_slice from .slicing import make_slice
from .spatial_coordinates import x_, x_staggered, x_staggered_vector, x_vector, y_, y_staggered, z_, z_staggered
from .sympyextensions import SymbolCreator from .sympyextensions import SymbolCreator
from .spatial_coordinates import (x_, x_staggered, x_staggered_vector, x_vector, from .datahandling import create_data_handling
y_, y_staggered, z_, z_staggered)
try:
import pystencils_autodiff
autodiff = pystencils_autodiff
except ImportError:
pass
__all__ = ['Field', 'FieldType', 'fields', __all__ = ['Field', 'FieldType', 'fields',
'TypedSymbol', 'TypedSymbol',
'make_slice', 'make_slice',
'create_kernel', 'create_indexed_kernel', 'create_staggered_kernel', 'CreateKernelConfig',
'create_kernel', 'create_staggered_kernel',
'Target', 'Backend',
'show_code', 'to_dot', 'get_code_obj', 'get_code_str', 'show_code', 'to_dot', 'get_code_obj', 'get_code_str',
'AssignmentCollection', 'AssignmentCollection',
'Assignment', 'Assignment', 'AddAugmentedAssignment',
'assignment_from_stencil', 'assignment_from_stencil',
'SymbolCreator', 'SymbolCreator',
'create_data_handling', 'create_data_handling',
'kernel', 'clear_cache',
'kernel', 'kernel_config',
'x_', 'y_', 'z_', 'x_', 'y_', 'z_',
'x_staggered', 'y_staggered', 'z_staggered', 'x_staggered', 'y_staggered', 'z_staggered',
'x_vector', 'x_staggered_vector', 'x_vector', 'x_staggered_vector',
'fd', 'fd',
'stencil'] 'stencil']
from . import _version
__version__ = _version.get_versions()['version']