Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 0 additions and 2160 deletions
import pytest
import pystencils as ps
import numpy as np
# This test aims to trigger deprication warnings. Thus the warnings should not be displayed in the warning summary.
def test_create_kernel_backwards_compatibility():
size = (30, 20)
src_field_string = np.random.rand(*size)
src_field_enum = np.copy(src_field_string)
src_field_config = np.copy(src_field_string)
dst_field_string = np.zeros(size)
dst_field_enum = np.zeros(size)
dst_field_config = np.zeros(size)
f = ps.Field.create_from_numpy_array("f", src_field_enum)
d = ps.Field.create_from_numpy_array("d", dst_field_enum)
jacobi = ps.Assignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4)
ast_enum = ps.create_kernel(jacobi, target=ps.Target.CPU).compile()
with pytest.warns(DeprecationWarning):
ast_string = ps.create_kernel(jacobi, target='cpu').compile()
# noinspection PyTypeChecker
with pytest.warns(DeprecationWarning):
ast_config = ps.create_kernel(jacobi, config=ps.CreateKernelConfig(target='cpu')).compile()
ast_enum(f=src_field_enum, d=dst_field_enum)
ast_string(f=src_field_string, d=dst_field_string)
ast_config(f=src_field_config, d=dst_field_config)
error = np.sum(np.abs(dst_field_enum - dst_field_string))
np.testing.assert_almost_equal(error, 0.0)
error = np.sum(np.abs(dst_field_enum - dst_field_config))
np.testing.assert_almost_equal(error, 0.0)
import sympy
import pytest
import pystencils
from pystencils.astnodes import get_dummy_symbol
from pystencils.backends.cuda_backend import CudaSympyPrinter
from pystencils.data_types import address_of
from pystencils.enums import Target
def test_cuda_known_functions():
printer = CudaSympyPrinter()
print(printer.known_functions)
x, y = pystencils.fields('x,y: float32 [2d]')
assignments = pystencils.AssignmentCollection({
get_dummy_symbol(): sympy.Function('atomicAdd')(address_of(y.center()), 2),
y.center(): sympy.Function('rsqrtf')(x[0, 0])
})
ast = pystencils.create_kernel(assignments, target=Target.GPU)
pytest.importorskip('pycuda')
pystencils.show_code(ast)
kernel = ast.compile()
assert(kernel is not None)
def test_cuda_but_not_c():
x, y = pystencils.fields('x,y: float32 [2d]')
assignments = pystencils.AssignmentCollection({
get_dummy_symbol(): sympy.Function('atomicAdd')(address_of(y.center()), 2),
y.center(): sympy.Function('rsqrtf')(x[0, 0])
})
ast = pystencils.create_kernel(assignments, target=Target.CPU)
pystencils.show_code(ast)
def test_cuda_unknown():
x, y = pystencils.fields('x,y: float32 [2d]')
assignments = pystencils.AssignmentCollection({
get_dummy_symbol(): sympy.Function('wtf')(address_of(y.center()), 2),
})
ast = pystencils.create_kernel(assignments, target=Target.GPU)
pystencils.show_code(ast)
%% Cell type:code id: tags:
``` python
import pytest
pytest.importorskip('graphviz')
```
%% Cell type:code id: tags:
``` python
from pystencils.session import *
from pystencils.astnodes import Block, Conditional
```
%% Cell type:code id: tags:
``` python
src, dst = ps.fields("src, dst: double[2D]", layout='c')
true_block = Block([ps.Assignment(dst[0, 0], src[-1, 0])])
false_block = Block([ps.Assignment(dst[0, 0], src[1, 0])])
ur = [true_block, Conditional(dst.center() > 0.0, true_block, false_block)]
ast = ps.create_kernel(ur)
```
%% Cell type:code id: tags:
``` python
ps.to_dot(ast, graph_style={'size': "9.5,12.5"})
```
%% Output
<graphviz.files.Source at 0x7f62452c4110>
# -*- coding: utf-8 -*-
#
# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
#
# Distributed under terms of the GPLv3 license.
"""
"""
from os.path import dirname, join
import numpy as np
import sympy
import pystencils
from pystencils.interpolation_astnodes import LinearInterpolator
try:
import pyconrad.autoinit
except Exception:
import unittest.mock
pyconrad = unittest.mock.MagicMock()
LENNA_FILE = join(dirname(__file__), 'test_data', 'lenna.png')
try:
import skimage.io
lenna = skimage.io.imread(LENNA_FILE, as_gray=True).astype(np.float32)
except Exception:
lenna = np.random.rand(20, 30).astype(np.float32)
def test_rotate_center():
x, y = pystencils.fields('x, y: float32[2d]')
# Rotate around center when setting coordindates origins to field centers
x.set_coordinate_origin_to_field_center()
y.set_coordinate_origin_to_field_center()
rotation_angle = sympy.pi / 5
transform_matrix = sympy.rot_axis3(rotation_angle)[:2, :2]
# Generic matrix transform works like that (for rotation it would be more clever to use transform_matrix.T)
inverse_matrix = transform_matrix.inv()
input_coordinate = x.physical_to_index(inverse_matrix @ y.physical_coordinates)
assignments = pystencils.AssignmentCollection({
y.center(): LinearInterpolator(x).at(input_coordinate)
})
kernel = pystencils.create_kernel(assignments).compile()
rotated = np.zeros_like(lenna)
kernel(x=lenna, y=rotated)
pyconrad.imshow(lenna, "lenna")
pyconrad.imshow(rotated, "rotated")
# If distance in input field is twice as close we will see a smaller image
x.coordinate_transform /= 2
input_coordinate = x.physical_to_index(inverse_matrix @ y.physical_coordinates)
assignments = pystencils.AssignmentCollection({
y.center(): LinearInterpolator(x).at(input_coordinate)
})
kernel = pystencils.create_kernel(assignments).compile()
rotated = np.zeros_like(lenna)
kernel(x=lenna, y=rotated)
pyconrad.imshow(rotated, "rotated smaller")
# Conversely, if output field has samples 3 times closer we will see a bigger image
y.coordinate_transform /= 3
input_coordinate = x.physical_to_index(inverse_matrix @ y.physical_coordinates)
assignments = pystencils.AssignmentCollection({
y.center(): LinearInterpolator(x).at(input_coordinate)
})
kernel = pystencils.create_kernel(assignments).compile()
rotated = np.zeros_like(lenna)
kernel(x=lenna, y=rotated)
pyconrad.imshow(rotated, "rotated bigger")
# coordinate_transform can be any matrix, also with symbols as entries
def main():
test_rotate_center()
if __name__ == '__main__':
main()
import numpy as np
from pystencils import Assignment, Field
from pystencils.cpu import create_indexed_kernel, make_python_function
def test_indexed_kernel():
arr = np.zeros((3, 4))
dtype = np.dtype([('x', int), ('y', int), ('value', arr.dtype)])
index_arr = np.zeros((3,), dtype=dtype)
index_arr[0] = (0, 2, 3.0)
index_arr[1] = (1, 3, 42.0)
index_arr[2] = (2, 1, 5.0)
indexed_field = Field.create_from_numpy_array('index', index_arr)
normal_field = Field.create_from_numpy_array('f', arr)
update_rule = Assignment(normal_field[0, 0], indexed_field('value'))
ast = create_indexed_kernel([update_rule], [indexed_field])
kernel = make_python_function(ast)
kernel(f=arr, index=index_arr)
for i in range(index_arr.shape[0]):
np.testing.assert_allclose(arr[index_arr[i]['x'], index_arr[i]['y']], index_arr[i]['value'], atol=1e-13)
def test_indexed_cuda_kernel():
try:
import pycuda
except ImportError:
pycuda = None
if pycuda:
from pystencils.gpucuda import make_python_function
import pycuda.gpuarray as gpuarray
from pystencils.gpucuda.kernelcreation import created_indexed_cuda_kernel
arr = np.zeros((3, 4))
dtype = np.dtype([('x', int), ('y', int), ('value', arr.dtype)])
index_arr = np.zeros((3,), dtype=dtype)
index_arr[0] = (0, 2, 3.0)
index_arr[1] = (1, 3, 42.0)
index_arr[2] = (2, 1, 5.0)
indexed_field = Field.create_from_numpy_array('index', index_arr)
normal_field = Field.create_from_numpy_array('f', arr)
update_rule = Assignment(normal_field[0, 0], indexed_field('value'))
ast = created_indexed_cuda_kernel([update_rule], [indexed_field])
kernel = make_python_function(ast)
gpu_arr = gpuarray.to_gpu(arr)
gpu_index_arr = gpuarray.to_gpu(index_arr)
kernel(f=gpu_arr, index=gpu_index_arr)
gpu_arr.get(arr)
for i in range(index_arr.shape[0]):
np.testing.assert_allclose(arr[index_arr[i]['x'], index_arr[i]['y']], index_arr[i]['value'], atol=1e-13)
else:
print("Did not run test on GPU since no pycuda is available")
# -*- coding: utf-8 -*-
#
# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
#
# Distributed under terms of the GPLv3 license.
"""
"""
import itertools
from os.path import dirname, join
import numpy as np
import pytest
import sympy
import pystencils
from pystencils.interpolation_astnodes import LinearInterpolator
from pystencils.spatial_coordinates import x_, y_
type_map = {
np.float32: 'float32',
np.float64: 'float64',
np.int32: 'int32',
}
try:
import pyconrad.autoinit
except Exception:
import unittest.mock
pyconrad = unittest.mock.MagicMock()
LENNA_FILE = join(dirname(__file__), 'test_data', 'lenna.png')
try:
import skimage.io
lenna = skimage.io.imread(LENNA_FILE, as_gray=True).astype(np.float64)
pyconrad.imshow(lenna)
except Exception:
lenna = np.random.rand(20, 30)
def test_interpolation():
x_f, y_f = pystencils.fields('x,y: float64 [2d]')
assignments = pystencils.AssignmentCollection({
y_f.center(): LinearInterpolator(x_f).at([x_ + 2.7, y_ + 7.2])
})
print(assignments)
ast = pystencils.create_kernel(assignments)
print(ast)
print(pystencils.show_code(ast))
kernel = ast.compile()
pyconrad.imshow(lenna)
out = np.zeros_like(lenna)
kernel(x=lenna, y=out)
pyconrad.imshow(out, "out")
def test_scale_interpolation():
x_f, y_f = pystencils.fields('x,y: float64 [2d]')
for address_mode in ['border', 'wrap', 'clamp', 'mirror']:
assignments = pystencils.AssignmentCollection({
y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at([0.5 * x_ + 2.7, 0.25 * y_ + 7.2])
})
print(assignments)
ast = pystencils.create_kernel(assignments)
print(ast)
print(pystencils.show_code(ast))
kernel = ast.compile()
out = np.zeros_like(lenna)
kernel(x=lenna, y=out)
pyconrad.imshow(out, "out " + address_mode)
@pytest.mark.parametrize('address_mode', ['border', 'clamp'])
def test_rotate_interpolation(address_mode):
"""
'wrap', 'mirror' currently fails on new sympy due to conjugate()
"""
x_f, y_f = pystencils.fields('x,y: float64 [2d]')
rotation_angle = sympy.pi / 5
transformed = sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_))
assignments = pystencils.AssignmentCollection({
y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed)
})
print(assignments)
ast = pystencils.create_kernel(assignments)
print(ast)
print(pystencils.show_code(ast))
kernel = ast.compile()
out = np.zeros_like(lenna)
kernel(x=lenna, y=out)
pyconrad.imshow(out, "out " + address_mode)
@pytest.mark.parametrize('dtype', (np.int32, np.float32, np.float64))
@pytest.mark.parametrize('address_mode', ('border', 'wrap', 'clamp', 'mirror'))
@pytest.mark.parametrize('use_textures', ('use_textures', False))
def test_rotate_interpolation_gpu(dtype, address_mode, use_textures):
pytest.importorskip('pycuda')
import pycuda.gpuarray as gpuarray
import pycuda.autoinit # noqa
rotation_angle = sympy.pi / 5
scale = 1
if dtype == np.int32:
lenna_gpu = gpuarray.to_gpu(
np.ascontiguousarray(lenna * 255, dtype))
else:
lenna_gpu = gpuarray.to_gpu(
np.ascontiguousarray(lenna, dtype))
x_f, y_f = pystencils.fields(f'x,y: {type_map[dtype]} [2d]', ghost_layers=0)
transformed = scale * \
sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) - sympy.Matrix([2, 2])
assignments = pystencils.AssignmentCollection({
y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed)
})
print(assignments)
ast = pystencils.create_kernel(assignments, target=pystencils.Target.GPU,
use_textures_for_interpolation=use_textures)
print(ast)
print(pystencils.show_code(ast))
kernel = ast.compile()
out = gpuarray.zeros_like(lenna_gpu)
kernel(x=lenna_gpu, y=out)
pyconrad.imshow(out,
f"out {address_mode} texture:{use_textures} {type_map[dtype]}")
@pytest.mark.parametrize('address_mode', ['border', 'wrap', 'mirror'])
@pytest.mark.parametrize('dtype', [np.float64, np.float32, np.int32])
@pytest.mark.parametrize('use_textures', ('use_textures', False,))
def test_shift_interpolation_gpu(address_mode, dtype, use_textures):
sver = sympy.__version__.split(".")
if (int(sver[0]) == 1 and int(sver[1]) < 2) and address_mode == 'mirror':
pytest.skip("% printed as fmod on old sympy")
pytest.importorskip('pycuda')
import pycuda.gpuarray as gpuarray
import pycuda.autoinit # noqa
rotation_angle = 0 # sympy.pi / 5
scale = 1
# shift = - sympy.Matrix([1.5, 1.5])
shift = sympy.Matrix((0.0, 0.0))
if dtype == np.int32:
lenna_gpu = gpuarray.to_gpu(
np.ascontiguousarray(lenna * 255, dtype))
else:
lenna_gpu = gpuarray.to_gpu(
np.ascontiguousarray(lenna, dtype))
x_f, y_f = pystencils.fields(f'x,y: {type_map[dtype]} [2d]', ghost_layers=0)
if use_textures:
transformed = scale * sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) + shift
else:
transformed = scale * sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) + shift
assignments = pystencils.AssignmentCollection({
y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed)
})
# print(assignments)
ast = pystencils.create_kernel(assignments, target=pystencils.Target.GPU,
use_textures_for_interpolation=use_textures)
# print(ast)
print(pystencils.show_code(ast))
kernel = ast.compile()
out = gpuarray.zeros_like(lenna_gpu)
kernel(x=lenna_gpu, y=out)
pyconrad.imshow(out,
f"out {address_mode} texture:{use_textures} {type_map[dtype]}")
@pytest.mark.parametrize('address_mode', ['border', 'clamp'])
def test_rotate_interpolation_size_change(address_mode):
"""
'wrap', 'mirror' currently fails on new sympy due to conjugate()
"""
x_f, y_f = pystencils.fields('x,y: float64 [2d]')
rotation_angle = sympy.pi / 5
transformed = sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_))
assignments = pystencils.AssignmentCollection({
y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed)
})
print(assignments)
ast = pystencils.create_kernel(assignments)
print(ast)
print(pystencils.show_code(ast))
kernel = ast.compile()
out = np.zeros((100, 150), np.float64)
kernel(x=lenna, y=out)
pyconrad.imshow(out, "small out " + address_mode)
@pytest.mark.parametrize('address_mode, target',
itertools.product(['border', 'wrap', 'clamp', 'mirror'], [pystencils.Target.CPU]))
def test_field_interpolated(address_mode, target):
x_f, y_f = pystencils.fields('x,y: float64 [2d]')
assignments = pystencils.AssignmentCollection({
y_f.center(): x_f.interpolated_access([0.5 * x_ + 2.7, 0.25 * y_ + 7.2], address_mode=address_mode)
})
print(assignments)
ast = pystencils.create_kernel(assignments, target=target)
print(ast)
print(pystencils.show_code(ast))
kernel = ast.compile()
out = np.zeros_like(lenna)
kernel(x=lenna, y=out)
pyconrad.imshow(out, "out " + address_mode)
def test_spatial_derivative():
x, y = pystencils.fields('x, y: float32[2d]')
tx, ty = pystencils.fields('t_x, t_y: float32[2d]')
diff = sympy.diff(x.interpolated_access((tx.center, ty.center)), tx.center)
print("diff: " + str(diff))
%% Cell type:code id: tags:
``` python
from pystencils.session import *
```
%% Cell type:code id: tags:
``` python
dh = ps.create_data_handling(domain_size=(256, 256), periodicity=True)
c_field = dh.add_array('c')
dh.fill("c", 0.0, ghost_layers=True)
```
%% Cell type:code id: tags:
``` python
for x in range(129):
for y in range(258):
dh.cpu_arrays['c'][x, y] = 1.0
```
%% Cell type:code id: tags:
``` python
plt.scalar_field(dh.cpu_arrays["c"])
```
%% Output
<matplotlib.image.AxesImage at 0x7fcb7d253710>
%% Cell type:code id: tags:
``` python
ur = ps.Assignment(c_field[0, 0], c_field[1, 0])
ast = ps.create_kernel(ur, target=dh.default_target, cpu_openmp=True)
kernel = ast.compile()
```
%% Cell type:code id: tags:
``` python
c_sync = dh.synchronization_function_cpu(['c'])
```
%% Cell type:code id: tags:
``` python
def timeloop(steps=10):
for i in range(steps):
c_sync()
dh.run_kernel(kernel)
return dh.gather_array('c')
```
%% Cell type:code id: tags:
``` python
ps.jupyter.set_display_mode('video')
```
%% Cell type:code id: tags:
``` python
ani = ps.plot.scalar_field_animation(timeloop, rescale=True, frames=12)
ps.jupyter.display_animation(ani)
```
%% Output
<IPython.core.display.HTML object>
%% Cell type:code id: tags:
``` python
ps.jupyter.set_display_mode('image_update')
```
%% Cell type:code id: tags:
``` python
ani = ps.plot.scalar_field_animation(timeloop, rescale=True, frames=12)
ps.jupyter.display_animation(ani)
```
%% Output
%% Cell type:code id: tags:
``` python
def grid_update_function(image):
for i in range(40):
c_sync()
dh.run_kernel(kernel)
return dh.gather_array('c')
```
%% Cell type:code id: tags:
``` python
animation = ps.jupyter.make_imshow_animation(dh.cpu_arrays["c"], grid_update_function, frames=300)
```
%% Output
%% Cell type:code id: tags:
``` python
ps.jupyter.set_display_mode("video")
ps.jupyter.set_display_mode("window")
ps.jupyter.set_display_mode("image_update")
ps.jupyter.activate_ipython()
```
import numpy as np
import pytest
import sympy as sp
from pathlib import Path
from kerncraft.kernel import KernelCode
from kerncraft.machinemodel import MachineModel
from kerncraft.models import ECM, ECMData, Benchmark
import pystencils as ps
from pystencils import Assignment, Field
from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set
from pystencils.cpu import create_kernel
from pystencils.datahandling import create_data_handling
from pystencils.kerncraft_coupling import KerncraftParameters, PyStencilsKerncraftKernel
from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark, run_c_benchmark
from pystencils.timeloop import TimeLoop
SCRIPT_FOLDER = Path(__file__).parent
INPUT_FOLDER = SCRIPT_FOLDER / "kerncraft_inputs"
@pytest.mark.kerncraft
def test_compilation():
machine_file_path = INPUT_FOLDER / "Example_SandyBridgeEP_E5-2680.yml"
machine = MachineModel(path_to_yaml=machine_file_path)
kernel_file_path = INPUT_FOLDER / "2d-5pt.c"
with open(kernel_file_path) as kernel_file:
reference_kernel = KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path)
reference_kernel.get_kernel_header(name='test_kernel')
reference_kernel.get_kernel_code(name='test_kernel')
reference_kernel.get_main_code(kernel_function_name='test_kernel')
size = [30, 50, 3]
arr = np.zeros(size)
a = Field.create_from_numpy_array('a', arr, index_dimensions=1)
b = Field.create_from_numpy_array('b', arr, index_dimensions=1)
s = sp.Symbol("s")
rhs = a[0, -1](0) + a[0, 1] + a[-1, 0] + a[1, 0]
update_rule = Assignment(b[0, 0], s * rhs)
ast = create_kernel([update_rule])
mine = generate_benchmark(ast, likwid=False)
print(mine)
def analysis(kernel, machine, model='ecmdata'):
if model == 'ecmdata':
model = ECMData(kernel, machine, KerncraftParameters())
elif model == 'ecm':
model = ECM(kernel, machine, KerncraftParameters())
elif model == 'benchmark':
model = Benchmark(kernel, machine, KerncraftParameters())
else:
model = ECM(kernel, machine, KerncraftParameters())
model.analyze()
return model
@pytest.mark.kerncraft
def test_3d_7pt_osaca():
size = [20, 200, 200]
kernel_file_path = INPUT_FOLDER / "3d-7pt.c"
machine_file_path = INPUT_FOLDER / "Example_SandyBridgeEP_E5-2680.yml"
machine_model = MachineModel(path_to_yaml=machine_file_path)
with open(kernel_file_path) as kernel_file:
reference_kernel = KernelCode(kernel_file.read(), machine=machine_model, filename=kernel_file_path)
reference_kernel.set_constant('M', size[0])
reference_kernel.set_constant('N', size[1])
assert size[1] == size[2]
analysis(reference_kernel, machine_model, model='ecm')
arr = np.zeros(size)
a = Field.create_from_numpy_array('a', arr, index_dimensions=0)
b = Field.create_from_numpy_array('b', arr, index_dimensions=0)
s = sp.Symbol("s")
rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1]
update_rule = Assignment(b[0, 0, 0], s * rhs)
ast = create_kernel([update_rule])
k = PyStencilsKerncraftKernel(ast, machine=machine_model, debug_print=True)
analysis(k, machine_model, model='ecm')
assert reference_kernel._flops == k._flops
path, lock = k.get_kernel_code(openmp=True)
with open(path) as kernel_file:
assert "#pragma omp parallel" in kernel_file.read()
path, lock = k.get_main_code()
with open(path) as kernel_file:
assert "likwid_markerInit();" in kernel_file.read()
@pytest.mark.kerncraft
def test_2d_5pt():
machine_file_path = INPUT_FOLDER / "Example_SandyBridgeEP_E5-2680.yml"
machine = MachineModel(path_to_yaml=machine_file_path)
size = [30, 50, 3]
kernel_file_path = INPUT_FOLDER / "2d-5pt.c"
with open(kernel_file_path) as kernel_file:
reference_kernel = KernelCode(kernel_file.read(), machine=machine,
filename=kernel_file_path)
reference = analysis(reference_kernel, machine)
arr = np.zeros(size)
a = Field.create_from_numpy_array('a', arr, index_dimensions=1)
b = Field.create_from_numpy_array('b', arr, index_dimensions=1)
s = sp.Symbol("s")
rhs = a[0, -1](0) + a[0, 1] + a[-1, 0] + a[1, 0]
update_rule = Assignment(b[0, 0], s * rhs)
ast = create_kernel([update_rule])
k = PyStencilsKerncraftKernel(ast, machine)
result = analysis(k, machine)
for e1, e2 in zip(reference.results['cycles'], result.results['cycles']):
assert e1 == e2
@pytest.mark.kerncraft
def test_3d_7pt():
machine_file_path = INPUT_FOLDER / "Example_SandyBridgeEP_E5-2680.yml"
machine = MachineModel(path_to_yaml=machine_file_path)
size = [30, 50, 50]
kernel_file_path = INPUT_FOLDER / "3d-7pt.c"
with open(kernel_file_path) as kernel_file:
reference_kernel = KernelCode(kernel_file.read(), machine=machine,
filename=kernel_file_path)
reference_kernel.set_constant('M', size[0])
reference_kernel.set_constant('N', size[1])
assert size[1] == size[2]
reference = analysis(reference_kernel, machine)
arr = np.zeros(size)
a = Field.create_from_numpy_array('a', arr, index_dimensions=0)
b = Field.create_from_numpy_array('b', arr, index_dimensions=0)
s = sp.Symbol("s")
rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1]
update_rule = Assignment(b[0, 0, 0], s * rhs)
ast = create_kernel([update_rule])
k = PyStencilsKerncraftKernel(ast, machine)
result = analysis(k, machine)
for e1, e2 in zip(reference.results['cycles'], result.results['cycles']):
assert e1 == e2
@pytest.mark.kerncraft
def test_benchmark():
size = [30, 50, 50]
arr = np.zeros(size)
a = Field.create_from_numpy_array('a', arr, index_dimensions=0)
b = Field.create_from_numpy_array('b', arr, index_dimensions=0)
s = sp.Symbol("s")
rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1]
update_rule = Assignment(b[0, 0, 0], s * rhs)
ast = create_kernel([update_rule])
c_benchmark_run = run_c_benchmark(ast, inner_iterations=1000, outer_iterations=1)
kernel = ast.compile()
a = np.full(size, fill_value=0.23)
b = np.full(size, fill_value=0.23)
timeloop = TimeLoop(steps=1)
timeloop.add_call(kernel, {'a': a, 'b': b, 's': 0.23})
timeloop_time = timeloop.benchmark(number_of_time_steps_for_estimation=1)
np.testing.assert_almost_equal(c_benchmark_run, timeloop_time, decimal=4)
@pytest.mark.kerncraft
def test_benchmark_vectorized():
instruction_sets = get_supported_instruction_sets()
if not instruction_sets:
pytest.skip("cannot detect CPU instruction set")
for vec in instruction_sets:
dh = create_data_handling((20, 20, 20), periodicity=True)
width = get_vector_instruction_set(instruction_set=vec)['width'] * 8
a = dh.add_array("a", values_per_cell=1, alignment=width)
b = dh.add_array("b", values_per_cell=1, alignment=width)
rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1]
update_rule = Assignment(b[0, 0, 0], rhs)
opt = {'instruction_set': vec, 'assume_aligned': True, 'nontemporal': True, 'assume_inner_stride_one': True}
ast = ps.create_kernel(update_rule, cpu_vectorize_info=opt)
run_c_benchmark(ast, 5)
from collections import defaultdict
import numpy as np
import pytest
from sympy.abc import x, y
from pystencils import Assignment, create_kernel, fields, CreateKernelConfig
from pystencils.transformations import adjust_c_single_precision_type
@pytest.mark.parametrize("data_type", ("float", "double"))
def test_single_precision(data_type):
dtype = f"float{64 if data_type == 'double' else 32}"
s = fields(f"s: {dtype}[1D]")
assignments = [Assignment(x, y), Assignment(s[0], x)]
ast = create_kernel(assignments, config=CreateKernelConfig(data_type=data_type))
assert ast.body.args[0].lhs.dtype.numpy_dtype == np.dtype(dtype)
assert ast.body.args[0].rhs.dtype.numpy_dtype == np.dtype(dtype)
assert ast.body.args[1].body.args[0].rhs.dtype.numpy_dtype == np.dtype(dtype)
def test_adjustment_dict():
d = dict({"x": "float", "y": "double"})
adjust_c_single_precision_type(d)
assert np.dtype(d["x"]) == np.dtype("float32")
assert np.dtype(d["y"]) == np.dtype("float64")
def test_adjustement_default_dict():
dd = defaultdict(lambda: "float")
dd["x"]
adjust_c_single_precision_type(dd)
dd["y"]
assert np.dtype(dd["x"]) == np.dtype("float32")
assert np.dtype(dd["y"]) == np.dtype("float32")
assert np.dtype(dd["z"]) == np.dtype("float32")
import pytest
try:
from pystencils.llvm.llvmjit import generate_and_jit
from pystencils.llvm import create_kernel, make_python_function
from pystencils.cpu.cpujit import get_llc_command
from pystencils import Assignment, Field, Target
import numpy as np
import sympy as sp
except ModuleNotFoundError:
pytest.importorskip("llvmlite")
def test_jacobi_fixed_field_size():
size = (30, 20)
src_field_llvm = np.random.rand(*size)
src_field_py = np.copy(src_field_llvm)
dst_field_llvm = np.zeros(size)
dst_field_py = np.zeros(size)
f = Field.create_from_numpy_array("f", src_field_llvm)
d = Field.create_from_numpy_array("d", dst_field_llvm)
jacobi = Assignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4)
ast = create_kernel([jacobi])
for x in range(1, size[0] - 1):
for y in range(1, size[1] - 1):
dst_field_py[x, y] = 0.25 * (src_field_py[x - 1, y] + src_field_py[x + 1, y] +
src_field_py[x, y - 1] + src_field_py[x, y + 1])
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
error = np.sum(np.abs(dst_field_py - dst_field_llvm))
np.testing.assert_almost_equal(error, 0.0)
@pytest.mark.skipif(not get_llc_command(), reason="Tests requires llc in $PATH")
def test_jacobi_fixed_field_size_gpu():
pytest.importorskip("pycuda")
size = (30, 20)
import pycuda.autoinit # noqa
from pycuda.gpuarray import to_gpu
src_field_llvm = np.random.rand(*size)
src_field_py = np.copy(src_field_llvm)
dst_field_llvm = np.zeros(size)
dst_field_py = np.zeros(size)
f = Field.create_from_numpy_array("f", src_field_py)
d = Field.create_from_numpy_array("d", dst_field_py)
src_field_llvm = to_gpu(src_field_llvm)
dst_field_llvm = to_gpu(dst_field_llvm)
jacobi = Assignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4)
ast = create_kernel([jacobi], target=Target.GPU)
for x in range(1, size[0] - 1):
for y in range(1, size[1] - 1):
dst_field_py[x, y] = 0.25 * (src_field_py[x - 1, y] + src_field_py[x + 1, y] +
src_field_py[x, y - 1] + src_field_py[x, y + 1])
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
error = np.sum(np.abs(dst_field_py - dst_field_llvm.get()))
np.testing.assert_almost_equal(error, 0.0)
def test_jacobi_variable_field_size():
size = (3, 3, 3)
f = Field.create_generic("f", 3)
d = Field.create_generic("d", 3)
jacobi = Assignment(d[0, 0, 0], (f[1, 0, 0] + f[-1, 0, 0] + f[0, 1, 0] + f[0, -1, 0]) / 4)
ast = create_kernel([jacobi])
src_field_llvm = np.random.rand(*size)
src_field_py = np.copy(src_field_llvm)
dst_field_llvm = np.zeros(size)
dst_field_py = np.zeros(size)
for x in range(1, size[0] - 1):
for y in range(1, size[1] - 1):
for z in range(1, size[2] - 1):
dst_field_py[x, y, z] = 0.25 * (src_field_py[x - 1, y, z] + src_field_py[x + 1, y, z] +
src_field_py[x, y - 1, z] + src_field_py[x, y + 1, z])
kernel = make_python_function(ast, {'f': src_field_llvm, 'd': dst_field_llvm})
kernel()
error = np.sum(np.abs(dst_field_py - dst_field_llvm))
np.testing.assert_almost_equal(error, 0.0)
def test_pow_llvm():
size = (30, 20)
src_field_llvm = 4 * np.ones(size)
dst_field_llvm = np.zeros(size)
f = Field.create_from_numpy_array("f", src_field_llvm)
d = Field.create_from_numpy_array("d", dst_field_llvm)
ur = Assignment(d[0, 0], sp.Pow(f[0, 0], -1.0))
ast = create_kernel([ur])
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
assert np.all(0.25 == dst_field_llvm)
ur = Assignment(d[0, 0], sp.Pow(f[0, 0], 0.5))
ast = create_kernel([ur])
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
assert np.all(2.0 == dst_field_llvm)
ur = Assignment(d[0, 0], sp.Pow(f[0, 0], 2.0))
ast = create_kernel([ur])
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
assert np.all(16.0 == dst_field_llvm)
ur = Assignment(d[0, 0], sp.Pow(f[0, 0], 3.0))
ast = create_kernel([ur])
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
assert np.all(64.0 == dst_field_llvm)
ur = Assignment(d[0, 0], sp.Pow(f[0, 0], 4.0))
ast = create_kernel([ur])
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
assert np.all(256.0 == dst_field_llvm)
def test_piecewise_llvm():
size = (30, 20)
src_field_llvm = np.zeros(size)
dst_field_llvm = np.zeros(size)
src_field_llvm[0:15, :] = 10.0
f = Field.create_from_numpy_array("f", src_field_llvm)
d = Field.create_from_numpy_array("d", dst_field_llvm)
picewise_test_strict_less_than = Assignment(d[0, 0], sp.Piecewise((1.0, f[0, 0] > 10), (0.0, True)))
ast = create_kernel([picewise_test_strict_less_than])
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
assert (np.all(dst_field_llvm[:, :] == 0.0))
src_field_llvm = np.zeros(size)
dst_field_llvm = np.zeros(size)
src_field_llvm[0:15, :] = 10.0
picewise_test_less_than = Assignment(d[0, 0], sp.Piecewise((1.0, f[0, 0] >= 10), (0.0, True)))
ast = create_kernel([picewise_test_less_than])
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
assert (np.all(dst_field_llvm[0:15, :] == 1.0))
src_field_llvm = np.zeros(size)
dst_field_llvm = np.zeros(size)
src_field_llvm[0:15, :] = 10.0
picewise_test_strict_greater_than = Assignment(d[0, 0], sp.Piecewise((1.0, f[0, 0] < 5), (0.0, True)))
ast = create_kernel([picewise_test_strict_greater_than])
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
assert (np.all(dst_field_llvm[15:, :] == 1.0))
src_field_llvm = np.zeros(size)
dst_field_llvm = np.zeros(size)
src_field_llvm[0:15, :] = 10.0
picewise_test_greater_than = Assignment(d[0, 0], sp.Piecewise((1.0, f[0, 0] <= 10), (0.0, True)))
ast = create_kernel([picewise_test_greater_than])
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
assert (np.all(dst_field_llvm[:, :] == 1.0))
src_field_llvm = np.zeros(size)
dst_field_llvm = np.zeros(size)
src_field_llvm[0:15, :] = 10.0
picewise_test_equality = Assignment(d[0, 0], sp.Piecewise((1.0, sp.Equality(f[0, 0], 10.0)), (0.0, True)))
ast = create_kernel([picewise_test_equality])
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
assert (np.all(dst_field_llvm[0:15, :] == 1.0))
src_field_llvm = np.zeros(size)
dst_field_llvm = np.zeros(size)
src_field_llvm[0:15, :] = 10.0
picewise_test_unequality = Assignment(d[0, 0], sp.Piecewise((1.0, sp.Unequality(f[0, 0], 10.0)), (0.0, True)))
ast = create_kernel([picewise_test_unequality])
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
assert (np.all(dst_field_llvm[15:, :] == 1.0))
def test_piecewise_or_llvm():
size = (30, 20)
src_field_llvm = np.zeros(size)
dst_field_llvm = np.zeros(size)
src_field_llvm[0:15, :] = 10.5
f = Field.create_from_numpy_array("f", src_field_llvm)
d = Field.create_from_numpy_array("d", dst_field_llvm)
picewise_test_or = Assignment(d[0, 0], sp.Piecewise((1.0, sp.Or(f[0, 0] > 11, f[0, 0] < 10)), (0.0, True)))
ast = create_kernel([picewise_test_or])
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
assert (np.all(dst_field_llvm[0:15, :] == 0.0))
def test_print_function_llvm():
size = (30, 20)
src_field_llvm = np.zeros(size)
dst_field_llvm = np.zeros(size)
src_field_llvm[0:15, :] = 0.0
f = Field.create_from_numpy_array("f", src_field_llvm)
d = Field.create_from_numpy_array("d", dst_field_llvm)
up = Assignment(d[0, 0], sp.sin(f[0, 0]))
ast = create_kernel([up])
# kernel = make_python_function(ast, {'f': src_field_llvm, 'd': dst_field_llvm})
jit = generate_and_jit(ast)
jit('kernel', dst_field_llvm, src_field_llvm)
assert (np.all(dst_field_llvm[:, :] == 0.0))
if __name__ == "__main__":
test_jacobi_fixed_field_size_gpu()
import numpy as np
import pytest
import sympy as sp
import pystencils
from pystencils.backends.cuda_backend import CudaBackend
from pystencils.backends.opencl_backend import OpenClBackend
from pystencils.opencl.opencljit import get_global_cl_queue, make_python_function
try:
import pyopencl as cl
HAS_OPENCL = True
import pystencils.opencl.autoinit
except Exception:
HAS_OPENCL = False
def test_print_opencl():
z, y, x = pystencils.fields("z, y, x: [2d]")
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
})
print(assignments)
ast = pystencils.create_kernel(assignments, target=pystencils.Target.GPU)
print(ast)
pystencils.show_code(ast, custom_backend=CudaBackend())
opencl_code = pystencils.get_code_str(ast, custom_backend=OpenClBackend())
print(opencl_code)
assert "__global double * RESTRICT const _data_x" in str(opencl_code)
assert "__global double * RESTRICT" in str(opencl_code)
assert "get_local_id(0)" in str(opencl_code)
@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
def test_opencl_jit_fixed_size():
pytest.importorskip('pycuda')
z, y, x = pystencils.fields("z, y, x: [20,30]")
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
})
print(assignments)
ast = pystencils.create_kernel(assignments, target=pystencils.Target.GPU) # TODO maybe Target Opencl
print(ast)
code = pystencils.show_code(ast, custom_backend=CudaBackend())
print(code)
opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
print(opencl_code)
cuda_kernel = ast.compile()
assert cuda_kernel is not None
import pycuda.gpuarray as gpuarray
x_cpu = np.random.rand(20, 30)
y_cpu = np.random.rand(20, 30)
z_cpu = np.random.rand(20, 30)
x = gpuarray.to_gpu(x_cpu)
y = gpuarray.to_gpu(y_cpu)
z = gpuarray.to_gpu(z_cpu)
cuda_kernel(x=x, y=y, z=z)
result_cuda = z.get()
import pyopencl.array as array
ctx = cl.create_some_context(0)
queue = cl.CommandQueue(ctx)
x = array.to_device(queue, x_cpu)
y = array.to_device(queue, y_cpu)
z = array.to_device(queue, z_cpu)
opencl_kernel = make_python_function(ast, queue, ctx)
assert opencl_kernel is not None
opencl_kernel(x=x, y=y, z=z)
result_opencl = z.get(queue)
assert np.allclose(result_cuda, result_opencl)
@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
def test_opencl_jit():
pytest.importorskip('pycuda')
z, y, x = pystencils.fields("z, y, x: [2d]")
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
})
print(assignments)
ast = pystencils.create_kernel(assignments, target=pystencils.Target.GPU)
print(ast)
pystencils.show_code(ast, custom_backend=CudaBackend())
pystencils.show_code(ast, custom_backend=OpenClBackend())
cuda_kernel = ast.compile()
assert cuda_kernel is not None
import pycuda.gpuarray as gpuarray
x_cpu = np.random.rand(20, 30)
y_cpu = np.random.rand(20, 30)
z_cpu = np.random.rand(20, 30)
x = gpuarray.to_gpu(x_cpu)
y = gpuarray.to_gpu(y_cpu)
z = gpuarray.to_gpu(z_cpu)
cuda_kernel(x=x, y=y, z=z)
result_cuda = z.get()
import pyopencl.array as array
ctx = cl.create_some_context(0)
queue = cl.CommandQueue(ctx)
x = array.to_device(queue, x_cpu)
y = array.to_device(queue, y_cpu)
z = array.to_device(queue, z_cpu)
opencl_kernel = make_python_function(ast, queue, ctx)
assert opencl_kernel is not None
opencl_kernel(x=x, y=y, z=z)
result_opencl = z.get(queue)
assert np.allclose(result_cuda, result_opencl)
@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
def test_opencl_jit_with_parameter():
pytest.importorskip('pycuda')
z, y, x = pystencils.fields("z, y, x: [2d]")
a = sp.Symbol('a')
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0]) + a
})
print(assignments)
ast = pystencils.create_kernel(assignments, target=pystencils.Target.GPU)
print(ast)
code = pystencils.show_code(ast, custom_backend=CudaBackend())
print(code)
opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
print(opencl_code)
cuda_kernel = ast.compile()
assert cuda_kernel is not None
import pycuda.gpuarray as gpuarray
x_cpu = np.random.rand(20, 30)
y_cpu = np.random.rand(20, 30)
z_cpu = np.random.rand(20, 30)
x = gpuarray.to_gpu(x_cpu)
y = gpuarray.to_gpu(y_cpu)
z = gpuarray.to_gpu(z_cpu)
cuda_kernel(x=x, y=y, z=z, a=5.)
result_cuda = z.get()
import pyopencl.array as array
ctx = cl.create_some_context(0)
queue = cl.CommandQueue(ctx)
x = array.to_device(queue, x_cpu)
y = array.to_device(queue, y_cpu)
z = array.to_device(queue, z_cpu)
opencl_kernel = make_python_function(ast, queue, ctx)
assert opencl_kernel is not None
opencl_kernel(x=x, y=y, z=z, a=5.)
result_opencl = z.get(queue)
assert np.allclose(result_cuda, result_opencl)
@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
def test_without_cuda():
z, y, x = pystencils.fields("z, y, x: [20,30]")
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
})
print(assignments)
ast = pystencils.create_kernel(assignments, target=pystencils.Target.GPU)
print(ast)
opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
print(opencl_code)
x_cpu = np.random.rand(20, 30)
y_cpu = np.random.rand(20, 30)
z_cpu = np.random.rand(20, 30)
import pyopencl.array as array
ctx = cl.create_some_context(0)
queue = cl.CommandQueue(ctx)
x = array.to_device(queue, x_cpu)
y = array.to_device(queue, y_cpu)
z = array.to_device(queue, z_cpu)
opencl_kernel = make_python_function(ast, queue, ctx)
assert opencl_kernel is not None
opencl_kernel(x=x, y=y, z=z)
@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
def test_kernel_creation():
global pystencils
z, y, x = pystencils.fields("z, y, x: [20,30]")
assignments = pystencils.AssignmentCollection({
z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
})
print(assignments)
import pystencils.opencl.autoinit
ast = pystencils.create_kernel(assignments, target=pystencils.Target.OPENCL)
print(ast.backend)
code = pystencils.get_code_str(ast)
print(code)
assert 'get_local_size' in code
opencl_kernel = ast.compile()
x_cpu = np.random.rand(20, 30)
y_cpu = np.random.rand(20, 30)
z_cpu = np.random.rand(20, 30)
import pyopencl.array as array
assert get_global_cl_queue()
x = array.to_device(get_global_cl_queue(), x_cpu)
y = array.to_device(get_global_cl_queue(), y_cpu)
z = array.to_device(get_global_cl_queue(), z_cpu)
assert opencl_kernel is not None
opencl_kernel(x=x, y=y, z=z)
%% Cell type:code id: tags:
``` python
import pytest
pytest.importorskip('pycuda')
```
%% Cell type:code id: tags:
``` python
from pystencils.session import *
sp.init_printing()
frac = sp.Rational
```
%% Cell type:markdown id: tags:
# Phase-field simulation of dentritic solidification in 3D
This notebook tests the model presented in the dentritic growth tutorial in 3D.
%% Cell type:code id: tags:
``` python
target = ps.Target.GPU
gpu = target == ps.Target.GPU
domain_size = (25, 25, 25) if 'is_test_run' in globals() else (300, 300, 300)
dh = ps.create_data_handling(domain_size=domain_size, periodicity=True, default_target=target)
φ_field = dh.add_array('phi', latex_name='φ')
φ_delta_field = dh.add_array('phidelta', latex_name='φ_D')
t_field = dh.add_array('T')
```
%% Cell type:code id: tags:
``` python
ε, m, δ, j, θzero, α, γ, Teq, κ, τ = sp.symbols("ε m δ j θ_0 α γ T_eq κ τ")
εb = sp.Symbol("\\bar{\\epsilon}")
discretize = ps.fd.Discretization2ndOrder(dx=0.03, dt=1e-5)
φ = φ_field.center
T = t_field.center
d = ps.fd.Diff
def f(φ, m):
return φ**4 / 4 - (frac(1, 2) - m/3) * φ**3 + (frac(1,4)-m/2)*φ**2
bulk_free_energy_density = f(φ, m)
interface_free_energy_density = ε ** 2 / 2 * (d(φ, 0) ** 2 + d(φ, 1) ** 2 + d(φ, 2) ** 2)
```
%% Cell type:markdown id: tags:
Here comes the major change, that has to be made for the 3D model: $\epsilon$ depends on the interface normal, which can not be computed simply as atan() as in the 2D case
%% Cell type:code id: tags:
``` python
n = sp.Matrix([d(φ, i) for i in range(3)])
nLen = sp.sqrt(sum(n_i**2 for n_i in n))
n = n / nLen
nVal = sum(n_i**4 for n_i in n)
σ = δ * nVal
εVal = εb * (1 + σ)
εVal
```
%% Output
$\displaystyle \bar{\epsilon} \left(δ \left(\frac{{\partial_{0} {{φ}_{(0,0,0)}}}^{4}}{\left({\partial_{0} {{φ}_{(0,0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0,0)}}}^{2} + {\partial_{2} {{φ}_{(0,0,0)}}}^{2}\right)^{2}} + \frac{{\partial_{1} {{φ}_{(0,0,0)}}}^{4}}{\left({\partial_{0} {{φ}_{(0,0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0,0)}}}^{2} + {\partial_{2} {{φ}_{(0,0,0)}}}^{2}\right)^{2}} + \frac{{\partial_{2} {{φ}_{(0,0,0)}}}^{4}}{\left({\partial_{0} {{φ}_{(0,0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0,0)}}}^{2} + {\partial_{2} {{φ}_{(0,0,0)}}}^{2}\right)^{2}}\right) + 1\right)$
⎛ ⎛ 4
⎜ ⎜ D(φ[0,0,0])
\bar{\epsilon}⋅⎜δ⋅⎜───────────────────────────────────────────── + ───────────
⎜ ⎜ 2
⎜ ⎜⎛ 2 2 2⎞ ⎛
⎝ ⎝⎝D(φ[0,0,0]) + D(φ[0,0,0]) + D(φ[0,0,0]) ⎠ ⎝D(φ[0,0,0]
4 4
D(φ[0,0,0]) D(φ[0,0,0])
────────────────────────────────── + ─────────────────────────────────────────
2
2 2 2⎞ ⎛ 2 2
) + D(φ[0,0,0]) + D(φ[0,0,0]) ⎠ ⎝D(φ[0,0,0]) + D(φ[0,0,0]) + D(φ[0,0,0]
⎞ ⎞
⎟ ⎟
────⎟ + 1⎟
2⎟ ⎟
2⎞ ⎟ ⎟
) ⎠ ⎠ ⎠
%% Cell type:code id: tags:
``` python
def m_func(temperature):
return (α / sp.pi) * sp.atan(γ * (Teq - temperature))
```
%% Cell type:code id: tags:
``` python
substitutions = {m: m_func(T),
ε: εVal}
fe_i = interface_free_energy_density.subs(substitutions)
fe_b = bulk_free_energy_density.subs(substitutions)
μ_if = ps.fd.expand_diff_full(ps.fd.functional_derivative(fe_i, φ), functions=[φ])
μ_b = ps.fd.expand_diff_full(ps.fd.functional_derivative(fe_b, φ), functions=[φ])
```
%% Cell type:code id: tags:
``` python
dF_dφ = μ_b + sp.Piecewise((μ_if, nLen**2 > 1e-10), (0, True))
```
%% Cell type:code id: tags:
``` python
parameters = {
τ: 0.0003,
κ: 1.8,
εb: 0.01,
δ: 0.3,
γ: 10,
j: 6,
α: 0.9,
Teq: 1.0,
θzero: 0.2,
sp.pi: sp.pi.evalf()
}
parameters
```
%% Output
$\displaystyle \left\{ \pi : 3.14159265358979, \ T_{eq} : 1.0, \ \bar{\epsilon} : 0.01, \ j : 6, \ α : 0.9, \ γ : 10, \ δ : 0.3, \ θ_{0} : 0.2, \ κ : 1.8, \ τ : 0.0003\right\}$
{π: 3.14159265358979, T_eq: 1.0, \bar{\epsilon}: 0.01, j: 6, α: 0.9, γ: 10, δ:
0.3, θ₀: 0.2, κ: 1.8, τ: 0.0003}
%% Cell type:code id: tags:
``` python
dφ_dt = - dF_dφ / τ
assignments = [
ps.Assignment(φ_delta_field.center, discretize(dφ_dt.subs(parameters))),
]
φEqs = ps.simp.sympy_cse_on_assignment_list(assignments)
φEqs.append(ps.Assignment(φ, discretize(ps.fd.transient(φ) - φ_delta_field.center)))
temperatureEvolution = -ps.fd.transient(T) + ps.fd.diffusion(T, 1) + κ * φ_delta_field.center
temperatureEqs = [
ps.Assignment(T, discretize(temperatureEvolution.subs(parameters)))
]
```
%% Cell type:code id: tags:
``` python
temperatureEqs
```
%% Output
$\displaystyle \left[ {{T}_{(0,0,0)}} \leftarrow 0.0111111111111111 {{T}_{(-1,0,0)}} + 0.0111111111111111 {{T}_{(0,-1,0)}} + 0.0111111111111111 {{T}_{(0,0,-1)}} + 0.933333333333333 {{T}_{(0,0,0)}} + 0.0111111111111111 {{T}_{(0,0,1)}} + 0.0111111111111111 {{T}_{(0,1,0)}} + 0.0111111111111111 {{T}_{(1,0,0)}} + 1.8 \cdot 10^{-5} {{φ_D}_{(0,0,0)}}\right]$
[T_C := 0.0111111111111111⋅T_W + 0.0111111111111111⋅T_S + 0.0111111111111111⋅T
_B + 0.933333333333333⋅T_C + 0.0111111111111111⋅T_T + 0.0111111111111111⋅T_N +
0.0111111111111111⋅T_E + 1.8e-5⋅phidelta_C]
%% Cell type:code id: tags:
``` python
φ_kernel = ps.create_kernel(φEqs, cpu_openmp=4, target=target).compile()
temperatureKernel = ps.create_kernel(temperatureEqs, cpu_openmp=4, target=target).compile()
```
%% Cell type:code id: tags:
``` python
def time_loop(steps):
φ_sync = dh.synchronization_function(['phi'], target=target)
temperature_sync = dh.synchronization_function(['T'], target=target)
dh.all_to_gpu()
for t in range(steps):
φ_sync()
dh.run_kernel(φ_kernel)
temperature_sync()
dh.run_kernel(temperatureKernel)
dh.all_to_cpu()
def init(nucleus_size=np.sqrt(5)):
for b in dh.iterate():
x, y, z = b.cell_index_arrays
x, y, z = x - b.shape[0] // 2, y - b.shape[1] // 2, z - b.shape[2] // 2
b['phi'].fill(0)
b['phi'][(x ** 2 + y ** 2 + z ** 2) < nucleus_size ** 2] = 1.0
b['T'].fill(0.0)
def plot(slice_obj=ps.make_slice[:, :, 0.5]):
plt.subplot(1, 3, 1)
plt.scalar_field(dh.gather_array('phi', slice_obj).squeeze())
plt.title("φ")
plt.colorbar()
plt.subplot(1, 3, 2)
plt.title("T")
plt.scalar_field(dh.gather_array('T', slice_obj).squeeze())
plt.colorbar()
plt.subplot(1, 3, 3)
plt.title("∂φ")
plt.scalar_field(dh.gather_array('phidelta', slice_obj).squeeze())
plt.colorbar()
```
%% Cell type:code id: tags:
``` python
init()
plot()
print(dh)
```
%% Output
Name| Inner (min/max)| WithGl (min/max)
----------------------------------------------------
T| ( 0, 0)| ( 0, 0)
phi| ( 0, 1)| ( 0, 1)
phidelta| ( 0, 0)| ( 0, 0)
%% Cell type:code id: tags:
``` python
if 'is_test_run' in globals():
time_loop(2)
assert np.isfinite(dh.max('phi'))
assert np.isfinite(dh.max('T'))
assert np.isfinite(dh.max('phidelta'))
else:
from time import perf_counter
vtk_writer = dh.create_vtk_writer('dentritic_growth_large', ['phi'])
last = perf_counter()
for i in range(300):
time_loop(100)
vtk_writer(i)
print("Step ", i, perf_counter() - last, dh.max('phi'))
last = perf_counter()
```
import pytest
import pystencils
from sympy import oo
@pytest.mark.parametrize('type', ('float32', 'float64', 'int64'))
@pytest.mark.parametrize('negative', (False, 'Negative'))
@pytest.mark.parametrize('target', (pystencils.Target.CPU, pystencils.Target.GPU))
def test_print_infinity(type, negative, target):
x = pystencils.fields(f'x: {type}[1d]')
if negative:
assignment = pystencils.Assignment(x.center, -oo)
else:
assignment = pystencils.Assignment(x.center, oo)
ast = pystencils.create_kernel(assignment, data_type=type, target=target)
if target == pystencils.Target.GPU:
pytest.importorskip('pycuda')
ast.compile()
print(ast.compile().code)
# -*- coding: utf-8 -*-
#
# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
#
# Distributed under terms of the GPLv3 license.
"""
"""
import pytest
import pystencils
from pystencils.backends.cbackend import CBackend
class UnsupportedNode(pystencils.astnodes.Node):
def __init__(self):
super().__init__()
def test_print_unsupported_node():
with pytest.raises(NotImplementedError, match='CBackend does not support node of type UnsupportedNode'):
CBackend()(UnsupportedNode())
import numpy as np
import pytest
from pystencils import Assignment, Field
try:
from pystencils.llvm import create_kernel, make_python_function
except ModuleNotFoundError:
pytest.importorskip("llvmlite")
def test_size_check():
"""Kernel with two fixed-sized fields creating with same size but calling with wrong size"""
src = np.zeros((20, 21, 9))
dst = np.zeros_like(src)
sym_src = Field.create_from_numpy_array("src", src, index_dimensions=1)
sym_dst = Field.create_from_numpy_array("dst", dst, index_dimensions=1)
update_rule = Assignment(sym_dst(0),
sym_src[-1, 1](1) + sym_src[1, -1](2))
ast = create_kernel([update_rule])
func = make_python_function(ast)
# change size of src field
new_shape = [a - 7 for a in src.shape]
src = np.zeros(new_shape)
dst = np.zeros(new_shape)
try:
func(src=src, dst=dst)
assert False, "Expected ValueError because fields with different sized where passed"
except ValueError:
pass
def test_fixed_size_mismatch_check():
"""Create kernel with two differently sized but constant fields """
src = np.zeros((20, 21, 9))
dst = np.zeros((21, 21, 9))
sym_src = Field.create_from_numpy_array("src", src, index_dimensions=1)
sym_dst = Field.create_from_numpy_array("dst", dst, index_dimensions=1)
update_rule = Assignment(sym_dst(0),
sym_src[-1, 1](1) + sym_src[1, -1](2))
try:
create_kernel([update_rule])
assert False, "Expected ValueError because fields with different sized where passed"
except ValueError:
pass
def test_fixed_and_variable_field_check():
"""Create kernel with two variable sized fields - calling them with different sizes"""
src = np.zeros((20, 21, 9))
sym_src = Field.create_from_numpy_array("src", src, index_dimensions=1)
sym_dst = Field.create_generic("dst", spatial_dimensions=2, index_dimensions=1)
update_rule = Assignment(sym_dst(0),
sym_src[-1, 1](1) + sym_src[1, -1](2))
try:
create_kernel([update_rule])
assert False, "Expected ValueError because fields with different sized where passed"
except ValueError:
pass
def test_two_variable_shaped_fields():
src = np.zeros((20, 21, 9))
dst = np.zeros((22, 21, 9))
sym_src = Field.create_generic("src", spatial_dimensions=2, index_dimensions=1)
sym_dst = Field.create_generic("dst", spatial_dimensions=2, index_dimensions=1)
update_rule = Assignment(sym_dst(0),
sym_src[-1, 1](1) + sym_src[1, -1](2))
ast = create_kernel([update_rule])
func = make_python_function(ast)
try:
func(src=src, dst=dst)
assert False, "Expected ValueError because fields with different sized where passed"
except ValueError:
pass
import numpy as np
import sympy as sp
from pystencils import Assignment, Field, TypedSymbol, create_kernel, make_slice
from pystencils.simp import sympy_cse_on_assignment_list
def test_sliced_iteration():
size = (4, 4)
src_arr = np.ones(size)
dst_arr = np.zeros_like(src_arr)
src_field = Field.create_from_numpy_array('src', src_arr)
dst_field = Field.create_from_numpy_array('dst', dst_arr)
a, b = sp.symbols("a b")
update_rule = Assignment(dst_field[0, 0],
(a * src_field[0, 1] + a * src_field[0, -1] +
b * src_field[1, 0] + b * src_field[-1, 0]) / 4)
x_end = TypedSymbol("x_end", "int")
s = make_slice[1:x_end, 1]
x_end_value = size[1] - 1
kernel = create_kernel(sympy_cse_on_assignment_list([update_rule]), iteration_slice=s).compile()
kernel(src=src_arr, dst=dst_arr, a=1.0, b=1.0, x_end=x_end_value)
expected_result = np.zeros(size)
expected_result[1:x_end_value, 1] = 1
np.testing.assert_almost_equal(expected_result, dst_arr)
def test_sliced_iteration_llvm():
import pytest
pytest.importorskip("llvmlite")
size = (4, 4)
src_arr = np.ones(size)
dst_arr = np.zeros_like(src_arr)
src_field = Field.create_from_numpy_array('src', src_arr)
dst_field = Field.create_from_numpy_array('dst', dst_arr)
a, b = sp.symbols("a b")
update_rule = Assignment(dst_field[0, 0],
(a * src_field[0, 1] + a * src_field[0, -1] +
b * src_field[1, 0] + b * src_field[-1, 0]) / 4)
x_end = TypedSymbol("x_end", "int")
s = make_slice[1:x_end, 1]
x_end_value = size[1] - 1
import pystencils.llvm as llvm_generator
ast = llvm_generator.create_kernel(sympy_cse_on_assignment_list([update_rule]), iteration_slice=s)
kernel = llvm_generator.make_python_function(ast)
kernel(src=src_arr, dst=dst_arr, a=1.0, b=1.0, x_end=x_end_value)
expected_result = np.zeros(size)
expected_result[1:x_end_value, 1] = 1
np.testing.assert_almost_equal(expected_result, dst_arr)
%% Cell type:code id: tags:
``` python
import pytest
pytest.importorskip('waLBerla')
```
%% Cell type:code id: tags:
``` python
from pystencils.session import *
from time import perf_counter
from statistics import median
from functools import partial
```
%% Cell type:markdown id: tags:
## Benchmark for Python call overhead
%% Cell type:code id: tags:
``` python
inner_repeats = 100
outer_repeats = 5
sizes = [2**i for i in range(1, 8)]
sizes
```
%% Output
$\displaystyle \left[ 2, \ 4, \ 8, \ 16, \ 32, \ 64, \ 128\right]$
[2, 4, 8, 16, 32, 64, 128]
%% Cell type:code id: tags:
``` python
def benchmark_pure(domain_size, extract_first=False):
src = np.zeros(domain_size)
dst = np.zeros_like(src)
f_src, f_dst = ps.fields("src, dst", src=src, dst=dst)
kernel = ps.create_kernel(ps.Assignment(f_dst.center, f_src.center)).compile()
if extract_first:
kernel = kernel.kernel
start = perf_counter()
for i in range(inner_repeats):
kernel(src=src, dst=dst)
src, dst = dst, src
end = perf_counter()
else:
start = perf_counter()
for i in range(inner_repeats):
kernel(src=src, dst=dst)
src, dst = dst, src
end = perf_counter()
return (end - start) / inner_repeats
def benchmark_datahandling(domain_size, parallel=False):
dh = ps.create_data_handling(domain_size, parallel=parallel)
f_src = dh.add_array('src')
f_dst = dh.add_array('dst')
kernel = ps.create_kernel(ps.Assignment(f_dst.center, f_src.center)).compile()
start = perf_counter()
for i in range(inner_repeats):
dh.run_kernel(kernel)
dh.swap('src', 'dst')
end = perf_counter()
return (end - start) / inner_repeats
name_to_func = {
'pure_extract': partial(benchmark_pure, extract_first=True),
'pure_no_extract': partial(benchmark_pure, extract_first=False),
'dh_serial': partial(benchmark_datahandling, parallel=False),
'dh_parallel': partial(benchmark_datahandling, parallel=True),
}
```
%% Cell type:code id: tags:
``` python
result = {'block_size': [],
'name': [],
'time': []}
for bs in sizes:
print("Computing size ", bs)
for name, func in name_to_func.items():
for i in range(outer_repeats):
time = func((bs, bs))
result['block_size'].append(bs)
result['name'].append(name)
result['time'].append(time)
```
%% Output
Computing size 2
Computing size 4
Computing size 8
Computing size 16
Computing size 32
Computing size 64
Computing size 128
%% Cell type:code id: tags:
``` python
if 'is_test_run' not in globals():
import pandas as pd
import seaborn as sns
data = pd.DataFrame.from_dict(result)
plt.subplot(1,2,1)
sns.barplot(x='block_size', y='time', hue='name', data=data, alpha=0.6)
plt.yscale('log')
plt.subplot(1,2,2)
data = pd.DataFrame.from_dict(result)
sns.barplot(x='block_size', y='time', hue='name', data=data, alpha=0.6)
```
%% Output
# -*- coding: utf-8 -*-
#
# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
#
# Distributed under terms of the GPLv3 license.
"""
"""
import numpy as np
import sympy
from sympy.abc import k
import pystencils
from pystencils.data_types import create_type
def test_sum():
sum = sympy.Sum(k, (k, 1, 100))
expanded_sum = sum.doit()
print(sum)
print(expanded_sum)
x = pystencils.fields('x: float32[1d]')
assignments = pystencils.AssignmentCollection({
x.center(): sum
})
ast = pystencils.create_kernel(assignments)
code = str(pystencils.get_code_obj(ast))
kernel = ast.compile()
print(code)
assert 'double sum' in code
array = np.zeros((10,), np.float32)
kernel(x=array)
assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
def test_sum_use_float():
sum = sympy.Sum(k, (k, 1, 100))
expanded_sum = sum.doit()
print(sum)
print(expanded_sum)
x = pystencils.fields('x: float32[1d]')
assignments = pystencils.AssignmentCollection({
x.center(): sum
})
ast = pystencils.create_kernel(assignments, data_type=create_type('float32'))
code = str(pystencils.get_code_obj(ast))
kernel = ast.compile()
print(code)
print(pystencils.get_code_obj(ast))
assert 'float sum' in code
array = np.zeros((10,), np.float32)
kernel(x=array)
assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
def test_product():
k = pystencils.TypedSymbol('k', create_type('int64'))
sum = sympy.Product(k, (k, 1, 10))
expanded_sum = sum.doit()
print(sum)
print(expanded_sum)
x = pystencils.fields('x: int64[1d]')
assignments = pystencils.AssignmentCollection({
x.center(): sum
})
ast = pystencils.create_kernel(assignments)
code = pystencils.get_code_str(ast)
kernel = ast.compile()
print(code)
assert 'int64_t product' in code
array = np.zeros((10,), np.int64)
kernel(x=array)
assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
def test_prod_var_limit():
k = pystencils.TypedSymbol('k', create_type('int64'))
limit = pystencils.TypedSymbol('limit', create_type('int64'))
sum = sympy.Sum(k, (k, 1, limit))
expanded_sum = sum.replace(limit, 100).doit()
print(sum)
print(expanded_sum)
x = pystencils.fields('x: int64[1d]')
assignments = pystencils.AssignmentCollection({
x.center(): sum
})
ast = pystencils.create_kernel(assignments)
pystencils.show_code(ast)
kernel = ast.compile()
array = np.zeros((10,), np.int64)
kernel(x=array, limit=100)
assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
import pystencils as ps
from pystencils import TypedSymbol
from pystencils.astnodes import LoopOverCoordinate, SympyAssignment
from pystencils.data_types import create_type
from pystencils.transformations import filtered_tree_iteration, get_loop_hierarchy, get_loop_counter_symbol_hierarchy
def test_loop_information():
f, g = ps.fields("f, g: double[2D]")
update_rule = ps.Assignment(g[0, 0], f[0, 0])
ast = ps.create_kernel(update_rule)
inner_loops = [l for l in filtered_tree_iteration(ast, LoopOverCoordinate, stop_type=SympyAssignment)
if l.is_innermost_loop]
loop_order = []
for i in get_loop_hierarchy(inner_loops[0].args[0]):
loop_order.append(i)
assert loop_order == [0, 1]
loop_symbols = get_loop_counter_symbol_hierarchy(inner_loops[0].args[0])
assert loop_symbols == [TypedSymbol("ctr_1", create_type("int"), nonnegative=True),
TypedSymbol("ctr_0", create_type("int"), nonnegative=True)]
from sympy.abc import a, b, c, d, e, f
import pystencils
from pystencils.data_types import cast_func, create_type
def test_type_interference():
x = pystencils.fields('x: float32[3d]')
assignments = pystencils.AssignmentCollection({
a: cast_func(10, create_type('float64')),
b: cast_func(10, create_type('uint16')),
e: 11,
c: b,
f: c + b,
d: c + b + x.center + e,
x.center: c + b + x.center
})
ast = pystencils.create_kernel(assignments)
code = str(pystencils.get_code_str(ast))
assert 'double a' in code
assert 'uint16_t b' in code
assert 'uint16_t f' in code
assert 'int64_t e' in code