Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 1258 additions and 6 deletions
import numpy as np
import sympy as sp
import pytest
import pystencils as ps
from pystencils.alignedarray import aligned_zeros
from pystencils.astnodes import Block, Conditional, SympyAssignment
from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set
from pystencils.enums import Target
from pystencils.cpu.vectorization import vec_all, vec_any
from pystencils.node_collection import NodeCollection
supported_instruction_sets = get_supported_instruction_sets() if get_supported_instruction_sets() else []
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('dtype', ('float32', 'float64'))
def test_vec_any(instruction_set, dtype):
if instruction_set in ['sve', 'sve2', 'sme', 'rvv']:
width = 4 # we don't know the actual value
else:
width = get_vector_instruction_set(dtype, instruction_set)['width']
data_arr = np.zeros((4 * width, 4 * width), dtype=dtype)
data_arr[3:9, 1:3 * width - 1] = 1.0
data = ps.fields(f"data: {dtype}[2D]", data=data_arr)
c = [
SympyAssignment(sp.Symbol("t1"), vec_any(data.center() > 0.0)),
Conditional(vec_any(data.center() > 0.0), Block([SympyAssignment(data.center(), 2.0)]))
]
assignmets = NodeCollection(c)
ast = ps.create_kernel(assignments=assignmets, target=ps.Target.CPU,
cpu_vectorize_info={'instruction_set': instruction_set})
kernel = ast.compile()
kernel(data=data_arr)
if instruction_set in ['sve', 'sve2', 'sme', 'rvv']:
# we only know that the first value has changed
np.testing.assert_equal(data_arr[3:9, :3 * width - 1], 2.0)
else:
np.testing.assert_equal(data_arr[3:9, :3 * width], 2.0)
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('dtype', ('float32', 'float64'))
def test_vec_all(instruction_set, dtype):
if instruction_set in ['sve', 'sve2', 'sme', 'rvv']:
width = 1000 # we don't know the actual value, need something guaranteed larger than vector
else:
width = get_vector_instruction_set(dtype, instruction_set)['width']
data_arr = np.zeros((4 * width, 4 * width), dtype=dtype)
data_arr[3:9, 1:3 * width - 1] = 1.0
data = ps.fields(f"data: {dtype}[2D]", data=data_arr)
c = [Conditional(vec_all(data.center() > 0.0), Block([SympyAssignment(data.center(), 2.0)]))]
assignmets = NodeCollection(c)
ast = ps.create_kernel(assignmets, target=Target.CPU,
cpu_vectorize_info={'instruction_set': instruction_set})
kernel = ast.compile()
kernel(data=data_arr)
if instruction_set in ['sve', 'sve2', 'sme', 'rvv']:
# we only know that some values in the middle have been replaced
assert np.all(data_arr[3:9, :2] <= 1.0)
assert np.any(data_arr[3:9, 2:] == 2.0)
else:
np.testing.assert_equal(data_arr[3:9, :1], 0.0)
np.testing.assert_equal(data_arr[3:9, 1:width], 1.0)
np.testing.assert_equal(data_arr[3:9, width:2 * width], 2.0)
np.testing.assert_equal(data_arr[3:9, 2 * width:3 * width - 1], 1.0)
np.testing.assert_equal(data_arr[3:9, 3 * width - 1:], 0.0)
@pytest.mark.skipif(not supported_instruction_sets, reason='cannot detect CPU instruction set')
def test_boolean_before_loop():
t1, t2 = sp.symbols('t1, t2')
f_arr = np.ones((10, 10))
g_arr = np.zeros_like(f_arr)
f, g = ps.fields("f, g : double[2D]", f=f_arr, g=g_arr)
a = [
ps.Assignment(t1, t2 > 0),
ps.Assignment(g[0, 0],
sp.Piecewise((f[0, 0], t1), (42, True)))
]
ast = ps.create_kernel(a, cpu_vectorize_info={'instruction_set': supported_instruction_sets[-1]})
kernel = ast.compile()
kernel(f=f_arr, g=g_arr, t2=1.0)
# print(g)
np.testing.assert_array_equal(g_arr, 1.0)
kernel(f=f_arr, g=g_arr, t2=-1.0)
np.testing.assert_array_equal(g_arr, 42.0)
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('dtype', ('float32', 'float64'))
@pytest.mark.parametrize('nontemporal', [False, True])
@pytest.mark.parametrize('aligned', [False, True])
def test_vec_maskstore(instruction_set, dtype, nontemporal, aligned):
data_arr = (aligned_zeros if aligned else np.zeros)((16, 16), dtype=dtype)
data_arr[3:-3, 3:-3] = 1.0
data = ps.fields(f"data: {dtype}[2D]", data=data_arr)
c = [Conditional(data.center() < 1.0, Block([SympyAssignment(data.center(), 2.0)]))]
assignmets = NodeCollection(c)
config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set,
'nontemporal': nontemporal,
'assume_aligned': aligned},
default_number_float=dtype)
ast = ps.create_kernel(assignmets, config=config)
if 'maskStore' in ast.instruction_set:
instruction = 'maskStream' if nontemporal and 'maskStream' in ast.instruction_set else (
'maskStoreA' if aligned and 'maskStoreA' in ast.instruction_set else 'maskStore')
assert ast.instruction_set[instruction].split('{')[0] in ps.get_code_str(ast)
print(ps.get_code_str(ast))
kernel = ast.compile()
kernel(data=data_arr)
np.testing.assert_equal(data_arr[:3, :], 2.0)
np.testing.assert_equal(data_arr[-3:, :], 2.0)
np.testing.assert_equal(data_arr[:, :3], 2.0)
np.testing.assert_equal(data_arr[:, -3:], 2.0)
np.testing.assert_equal(data_arr[3:-3, 3:-3], 1.0)
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('dtype', ('float32', 'float64'))
@pytest.mark.parametrize('nontemporal', [False, True])
def test_vec_maskscatter(instruction_set, dtype, nontemporal):
data_arr = np.zeros((16, 16), dtype=dtype)
data_arr[3:-3, 3:-3] = 1.0
data = ps.fields(f"data: {dtype}[2D]")
c = [Conditional(data.center() < 1.0, Block([SympyAssignment(data.center(), 2.0)]))]
assignmets = NodeCollection(c)
config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set,
'nontemporal': nontemporal},
default_number_float=dtype)
if 'maskStoreS' not in get_vector_instruction_set(dtype, instruction_set) \
and not instruction_set.startswith('sve'):
with pytest.warns(UserWarning) as warn:
ast = ps.create_kernel(assignmets, config=config)
assert 'Could not vectorize loop' in warn[0].message.args[0]
else:
with pytest.warns(None) as warn:
ast = ps.create_kernel(assignmets, config=config)
assert len(warn) == 0
instruction = 'maskStreamS' if nontemporal and 'maskStreamS' in ast.instruction_set else 'maskStoreS'
assert ast.instruction_set[instruction].split('{')[0] in ps.get_code_str(ast)
print(ps.get_code_str(ast))
kernel = ast.compile()
kernel(data=data_arr)
np.testing.assert_equal(data_arr[:3, :], 2.0)
np.testing.assert_equal(data_arr[-3:, :], 2.0)
np.testing.assert_equal(data_arr[:, :3], 2.0)
np.testing.assert_equal(data_arr[:, -3:], 2.0)
np.testing.assert_equal(data_arr[3:-3, 3:-3], 1.0)
from collections import defaultdict
import numpy as np
import pytest
from pystencils import CreateKernelConfig, Target, Backend
from pystencils.typing import BasicType
def test_config():
# targets
config = CreateKernelConfig(target=Target.CPU)
assert config.target == Target.CPU
assert config.backend == Backend.C
config = CreateKernelConfig(target=Target.GPU)
assert config.target == Target.GPU
assert config.backend == Backend.CUDA
# typing
config = CreateKernelConfig(data_type=np.float64)
assert isinstance(config.data_type, defaultdict)
assert config.data_type.default_factory() == BasicType('float64')
assert config.default_number_float == BasicType('float64')
assert config.default_number_int == BasicType('int64')
config = CreateKernelConfig(data_type=np.float32)
assert isinstance(config.data_type, defaultdict)
assert config.data_type.default_factory() == BasicType('float32')
assert config.default_number_float == BasicType('float32')
assert config.default_number_int == BasicType('int64')
config = CreateKernelConfig(data_type=np.float32, default_number_float=np.float64)
assert isinstance(config.data_type, defaultdict)
assert config.data_type.default_factory() == BasicType('float32')
assert config.default_number_float == BasicType('float64')
assert config.default_number_int == BasicType('int64')
config = CreateKernelConfig(data_type=np.float32, default_number_float=np.float64, default_number_int=np.int16)
assert isinstance(config.data_type, defaultdict)
assert config.data_type.default_factory() == BasicType('float32')
assert config.default_number_float == BasicType('float64')
assert config.default_number_int == BasicType('int16')
config = CreateKernelConfig(data_type='float64')
assert isinstance(config.data_type, defaultdict)
assert config.data_type.default_factory() == BasicType('float64')
assert config.default_number_float == BasicType('float64')
assert config.default_number_int == BasicType('int64')
config = CreateKernelConfig(data_type={'a': np.float64, 'b': np.float32})
assert isinstance(config.data_type, defaultdict)
assert config.data_type.default_factory() == BasicType('float64')
assert config.default_number_float == BasicType('float64')
assert config.default_number_int == BasicType('int64')
config = CreateKernelConfig(data_type={'a': np.float32, 'b': np.int32})
assert isinstance(config.data_type, defaultdict)
assert config.data_type.default_factory() == BasicType('float32')
assert config.default_number_float == BasicType('float32')
assert config.default_number_int == BasicType('int64')
def test_config_target_as_string():
with pytest.raises(ValueError):
CreateKernelConfig(target='cpu')
def test_config_backend_as_string():
with pytest.raises(ValueError):
CreateKernelConfig(backend='C')
def test_config_python_types():
with pytest.raises(ValueError):
CreateKernelConfig(data_type=float)
def test_config_python_types2():
with pytest.raises(ValueError):
CreateKernelConfig(data_type={'a': float})
def test_config_python_types3():
with pytest.raises(ValueError):
CreateKernelConfig(default_number_float=float)
def test_config_python_types4():
with pytest.raises(ValueError):
CreateKernelConfig(default_number_int=int)
def test_config_python_types5():
with pytest.raises(ValueError):
CreateKernelConfig(data_type="float")
def test_config_python_types6():
with pytest.raises(ValueError):
CreateKernelConfig(default_number_float="float")
def test_config_python_types7():
dtype = defaultdict(lambda: 'float', {'a': np.float64, 'b': np.int64})
with pytest.raises(ValueError):
CreateKernelConfig(data_type=dtype)
def test_config_python_types8():
dtype = defaultdict(lambda: float, {'a': np.float64, 'b': np.int64})
with pytest.raises(ValueError):
CreateKernelConfig(data_type=dtype)
def test_config_python_types9():
dtype = defaultdict(lambda: 'float32', {'a': 'float', 'b': np.int64})
with pytest.raises(ValueError):
CreateKernelConfig(data_type=dtype)
def test_config_python_types10():
dtype = defaultdict(lambda: 'float32', {'a': float, 'b': np.int64})
with pytest.raises(ValueError):
CreateKernelConfig(data_type=dtype)
import numpy as np
import sympy as sp
import pystencils as ps
import pystencils.config
def test_create_kernel_config():
c = pystencils.config.CreateKernelConfig()
assert c.backend == ps.Backend.C
assert c.target == ps.Target.CPU
c = pystencils.config.CreateKernelConfig(target=ps.Target.GPU)
assert c.backend == ps.Backend.CUDA
c = pystencils.config.CreateKernelConfig(backend=ps.Backend.CUDA)
assert c.target == ps.Target.CPU
assert c.backend == ps.Backend.CUDA
def test_kernel_decorator_config():
config = pystencils.config.CreateKernelConfig()
a, b, c = ps.fields(a=np.ones(100), b=np.ones(100), c=np.ones(100))
@ps.kernel_config(config)
def test():
a[0] @= b[0] + c[0]
ps.create_kernel(**test)
def test_kernel_decorator2():
h = sp.symbols("h")
dtype = "float64"
src, dst = ps.fields(f"src, src_tmp: {dtype}[3D]")
@ps.kernel
def kernel_func():
dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0]
+ src[0, 1, 0] + src[0, -1, 0]
+ src[0, 0, 1] + src[0, 0, -1]) / (6 * h ** 2)
# assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=2)
ast = ps.create_kernel(kernel_func)
code = ps.get_code_str(ast)
from subprocess import CalledProcessError
import pycuda.driver
import pytest
import sympy
import pystencils
import pystencils.cpu.cpujit
import pystencils.gpucuda.cudajit
from pystencils.backends.cbackend import CBackend
from pystencils.backends.cuda_backend import CudaBackend
from pystencils.enums import Target
class ScreamingBackend(CBackend):
......@@ -25,26 +23,29 @@ class ScreamingGpuBackend(CudaBackend):
return normal_code.upper()
def test_custom_backends():
z, x, y = pystencils.fields("z, y, x: [2d]")
def test_custom_backends_cpu():
z, y, x = pystencils.fields("z, y, x: [2d]")
normal_assignments = pystencils.AssignmentCollection([pystencils.Assignment(
z[0, 0], x[0, 0] * sympy.log(x[0, 0] * y[0, 0]))], [])
z[0, 0], x[0, 0] * x[0, 0] * y[0, 0])], [])
ast = pystencils.create_kernel(normal_assignments, target='cpu')
print(pystencils.show_code(ast, ScreamingBackend()))
ast = pystencils.create_kernel(normal_assignments, target=Target.CPU)
pystencils.show_code(ast, ScreamingBackend())
with pytest.raises(CalledProcessError):
pystencils.cpu.cpujit.make_python_function(ast, custom_backend=ScreamingBackend())
ast = pystencils.create_kernel(normal_assignments, target='gpu')
print(pystencils.show_code(ast, ScreamingGpuBackend()))
with pytest.raises(pycuda.driver.CompileError):
pystencils.gpucuda.cudajit.make_python_function(ast, custom_backend=ScreamingGpuBackend())
def test_custom_backends_gpu():
pytest.importorskip('cupy')
import cupy
import pystencils.gpu.gpujit
def main():
test_custom_backends()
z, x, y = pystencils.fields("z, y, x: [2d]")
normal_assignments = pystencils.AssignmentCollection([pystencils.Assignment(
z[0, 0], x[0, 0] * x[0, 0] * y[0, 0])], [])
if __name__ == "__main__":
main()
ast = pystencils.create_kernel(normal_assignments, target=Target.GPU)
pystencils.show_code(ast, ScreamingGpuBackend())
with pytest.raises((cupy.cuda.compiler.JitifyException, cupy.cuda.compiler.CompileException)):
pystencils.gpu.gpujit.make_python_function(ast, custom_backend=ScreamingGpuBackend())
File added
File added
File added
File added
File added
File added
tests/test_data/lenna.png

463 KiB

tests/test_data/test_vessel2d_mask.png

7.43 KiB

import os
from tempfile import TemporaryDirectory
from pathlib import Path
import numpy as np
import pystencils as ps
from pystencils import create_data_handling, create_kernel
from pystencils.gpu.gpu_array_handler import GPUArrayHandler
from pystencils.enums import Target
try:
import pytest
except ImportError:
import unittest.mock
pytest = unittest.mock.MagicMock()
try:
import cupy.cuda.runtime
device_numbers = range(cupy.cuda.runtime.getDeviceCount())
except ImportError:
device_numbers = []
SCRIPT_FOLDER = Path(__file__).parent.absolute()
INPUT_FOLDER = SCRIPT_FOLDER / "test_data"
def basic_iteration(dh):
......@@ -17,7 +35,7 @@ def basic_iteration(dh):
def access_and_gather(dh, domain_size):
dh.add_array('f1', dtype=np.dtype(np.int32))
dh.add_array('f1', dtype=np.dtype(np.int8))
dh.add_array_like('f2', 'f1')
dh.add_array('v1', values_per_cell=3, dtype=np.int64, ghost_layers=2)
dh.add_array_like('v2', 'v1')
......@@ -28,7 +46,7 @@ def access_and_gather(dh, domain_size):
# Check symbolic field properties
assert dh.fields.f1.index_dimensions == 0
assert dh.fields.f1.spatial_dimensions == len(domain_size)
assert dh.fields.f1.dtype.numpy_dtype == np.int32
assert dh.fields.f1.dtype.numpy_dtype == np.int8
assert dh.fields.v1.index_dimensions == 1
assert dh.fields.v1.spatial_dimensions == len(domain_size)
......@@ -73,14 +91,10 @@ def access_and_gather(dh, domain_size):
def synchronization(dh, test_gpu=False):
field_name = 'comm_field_test'
if test_gpu:
try:
from pycuda import driver
import pycuda.autoinit
except ImportError:
return
pytest.importorskip("cupy")
field_name += 'Gpu'
dh.add_array(field_name, ghost_layers=1, dtype=np.int32, cpu=True, gpu=test_gpu)
dh.add_array(field_name, ghost_layers=1, dtype=np.int8, cpu=True, gpu=test_gpu)
# initialize everything with 1
for b in dh.iterate(ghost_layers=1):
......@@ -90,8 +104,10 @@ def synchronization(dh, test_gpu=False):
if test_gpu:
dh.to_gpu(field_name)
dh.synchronization_function_gpu(field_name)()
else:
dh.synchronization_function_cpu(field_name)()
dh.synchronization_function(field_name, target='gpu' if test_gpu else 'cpu')()
if test_gpu:
dh.to_cpu(field_name)
......@@ -100,17 +116,16 @@ def synchronization(dh, test_gpu=False):
np.testing.assert_equal(42, b[field_name])
def kernel_execution_jacobi(dh, test_gpu=False):
if test_gpu:
try:
from pycuda import driver
import pycuda.autoinit
except ImportError:
print("Skipping kernel_execution_jacobi GPU version, because pycuda not available")
return
def kernel_execution_jacobi(dh, target):
test_gpu = target == Target.GPU
dh.add_array('f', gpu=test_gpu)
dh.add_array('tmp', gpu=test_gpu)
if test_gpu:
assert dh.is_on_gpu('f')
assert dh.is_on_gpu('tmp')
stencil_2d = [(1, 0), (-1, 0), (0, 1), (0, -1)]
stencil_3d = [(1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1)]
stencil = stencil_2d if dh.dim == 2 else stencil_3d
......@@ -119,7 +134,7 @@ def kernel_execution_jacobi(dh, test_gpu=False):
def jacobi():
dh.fields.tmp.center @= sum(dh.fields.f.neighbors(stencil)) / len(stencil)
kernel = create_kernel(jacobi, target='gpu' if test_gpu else 'cpu').compile()
kernel = create_kernel(jacobi, config=ps.CreateKernelConfig(target=target)).compile()
for b in dh.iterate(ghost_layers=1):
b['f'].fill(42)
dh.run_kernel(kernel)
......@@ -128,6 +143,7 @@ def kernel_execution_jacobi(dh, test_gpu=False):
def vtk_output(dh):
pytest.importorskip('pyevtk')
dh.add_array('scalar_field')
dh.add_array('vector_field', values_per_cell=dh.dim)
dh.add_array('multiple_scalar_field', values_per_cell=9)
......@@ -196,18 +212,200 @@ def test_access_and_gather():
def test_kernel():
for domain_shape in [(4, 5), (3, 4, 5)]:
dh = create_data_handling(domain_size=domain_shape, periodicity=True)
kernel_execution_jacobi(dh, test_gpu=True)
assert all(dh.periodicity)
kernel_execution_jacobi(dh, Target.CPU)
reduction(dh)
try:
import pycuda
import cupy
dh = create_data_handling(domain_size=domain_shape, periodicity=True)
kernel_execution_jacobi(dh, test_gpu=False)
kernel_execution_jacobi(dh, Target.GPU)
except ImportError:
pass
@pytest.mark.parametrize('target', (Target.CPU, Target.GPU))
def test_kernel_param(target):
for domain_shape in [(4, 5), (3, 4, 5)]:
if target == Target.GPU:
pytest.importorskip('cupy')
dh = create_data_handling(domain_size=domain_shape, periodicity=True, default_target=target)
kernel_execution_jacobi(dh, target)
reduction(dh)
def test_vtk_output():
pytest.importorskip('pyevtk')
for domain_shape in [(4, 5), (3, 4, 5)]:
dh = create_data_handling(domain_size=domain_shape, periodicity=True)
vtk_output(dh)
def test_add_arrays():
domain_shape = (3, 4, 5)
field_description = 'x, y(9)'
dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=0, default_layout='numpy')
x_, y_ = dh.add_arrays(field_description)
x, y = ps.fields(field_description + ': [3,4,5]')
assert x_ == x
assert y_ == y
assert x == dh.fields['x']
assert y == dh.fields['y']
@pytest.mark.parametrize('shape', [(17, 12), (7, 11, 18)])
@pytest.mark.parametrize('layout', ['zyxf', 'fzyx'])
def test_add_arrays_with_layout(shape, layout):
pytest.importorskip('cupy')
dh = create_data_handling(domain_size=shape, default_layout=layout, default_target=ps.Target.GPU)
f1 = dh.add_array("f1", values_per_cell=19)
dh.fill(f1.name, 1.0)
assert dh.cpu_arrays[f1.name].shape == dh.gpu_arrays[f1.name].shape
assert dh.cpu_arrays[f1.name].strides == dh.gpu_arrays[f1.name].strides
assert dh.cpu_arrays[f1.name].dtype == dh.gpu_arrays[f1.name].dtype
def test_get_kwarg():
domain_shape = (10, 10)
field_description = 'src, dst'
dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1)
src, dst = dh.add_arrays(field_description)
dh.fill("src", 1.0, ghost_layers=True)
dh.fill("dst", 0.0, ghost_layers=True)
with pytest.raises(ValueError):
dh.add_array('src', values_per_cell=1)
ur = ps.Assignment(src.center, dst.center)
kernel = ps.create_kernel(ur).compile()
kw = dh.get_kernel_kwargs(kernel)
assert np.all(kw[0]['src'] == dh.cpu_arrays['src'])
assert np.all(kw[0]['dst'] == dh.cpu_arrays['dst'])
def test_add_custom_data():
pytest.importorskip('cupy')
import cupy as cp
def cpu_data_create_func():
return np.ones((2, 2), dtype=np.float64)
def gpu_data_create_func():
return cp.zeros((2, 2), dtype=np.float64)
def cpu_to_gpu_transfer_func(gpuarr, cpuarray):
gpuarr.set(cpuarray)
def gpu_to_cpu_transfer_func(gpuarr, cpuarray):
cpuarray[:] = gpuarr.get()
dh = create_data_handling(domain_size=(10, 10))
dh.add_custom_data('custom_data',
cpu_data_create_func,
gpu_data_create_func,
cpu_to_gpu_transfer_func,
gpu_to_cpu_transfer_func)
assert np.all(dh.custom_data_cpu['custom_data'] == 1)
assert np.all(dh.custom_data_gpu['custom_data'].get() == 0)
dh.to_cpu(name='custom_data')
dh.to_gpu(name='custom_data')
assert 'custom_data' in dh.custom_data_names
def test_log():
dh = create_data_handling(domain_size=(10, 10))
dh.log_on_root()
assert dh.is_root
assert dh.world_rank == 0
def test_save_data():
domain_shape = (2, 2)
dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1)
dh.add_array("src", values_per_cell=9)
dh.fill("src", 1.0, ghost_layers=True)
dh.add_array("dst", values_per_cell=9)
dh.fill("dst", 1.0, ghost_layers=True)
dh.save_all(str(INPUT_FOLDER) + '/datahandling_save_test')
def test_load_data():
domain_shape = (2, 2)
dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1)
dh.add_array("src", values_per_cell=9)
dh.fill("src", 0.0, ghost_layers=True)
dh.add_array("dst", values_per_cell=9)
dh.fill("dst", 0.0, ghost_layers=True)
dh.load_all(str(INPUT_FOLDER) + '/datahandling_load_test')
assert np.all(dh.cpu_arrays['src']) == 1
assert np.all(dh.cpu_arrays['dst']) == 1
domain_shape = (3, 3)
dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1)
dh.add_array("src", values_per_cell=9)
dh.fill("src", 0.0, ghost_layers=True)
dh.add_array("dst", values_per_cell=9)
dh.fill("dst", 0.0, ghost_layers=True)
dh.add_array("dst2", values_per_cell=9)
dh.fill("dst2", 0.0, ghost_layers=True)
dh.load_all(str(INPUT_FOLDER) + '/datahandling_load_test')
assert np.all(dh.cpu_arrays['src']) == 0
assert np.all(dh.cpu_arrays['dst']) == 0
assert np.all(dh.cpu_arrays['dst2']) == 0
@pytest.mark.parametrize("device_number", device_numbers)
def test_array_handler(device_number):
size = (2, 2)
pytest.importorskip('cupy')
array_handler = GPUArrayHandler(device_number)
zero_array = array_handler.zeros(size)
cpu_array = np.empty(size)
array_handler.download(zero_array, cpu_array)
assert np.all(cpu_array) == 0
ones_array = array_handler.ones(size)
cpu_array = np.empty(size)
array_handler.download(ones_array, cpu_array)
assert np.all(cpu_array) == 1
empty = array_handler.empty(size)
assert empty.strides == (16, 8)
empty = array_handler.empty(shape=size, order="F")
assert empty.strides == (8, 16)
random_array = array_handler.randn(size)
cpu_array = np.empty((20, 40), dtype=np.float64)
gpu_array = array_handler.to_gpu(cpu_array)
assert cpu_array.base is None
assert gpu_array.base is None
assert gpu_array.strides == cpu_array.strides
cpu_array2 = np.empty((20, 40), dtype=np.float64)
cpu_array2 = cpu_array2.swapaxes(0, 1)
gpu_array2 = array_handler.to_gpu(cpu_array2)
assert cpu_array2.base is not None
assert gpu_array2.base is not None
assert gpu_array2.strides == cpu_array2.strides
import numpy as np
import waLBerla as wlb
import pystencils
from pystencils import make_slice
from pathlib import Path
from pystencils.boundaries import BoundaryHandling, Neumann
from pystencils.slicing import slice_from_direction
from pystencils.datahandling.parallel_datahandling import ParallelDataHandling
from pystencils.datahandling import create_data_handling
from tests.test_datahandling import (
access_and_gather, kernel_execution_jacobi, reduction, synchronization, vtk_output)
SCRIPT_FOLDER = Path(__file__).parent.absolute()
INPUT_FOLDER = SCRIPT_FOLDER / "test_data"
try:
import pytest
except ImportError:
import unittest.mock
pytest = unittest.mock.MagicMock()
def test_access_and_gather():
block_size = (4, 7, 1)
num_blocks = (3, 2, 1)
cells = tuple(a * b for a, b in zip(block_size, num_blocks))
blocks = wlb.createUniformBlockGrid(blocks=num_blocks, cellsPerBlock=block_size, oneBlockPerProcess=False,
periodic=(1, 1, 1))
dh = ParallelDataHandling(blocks, default_ghost_layers=2)
access_and_gather(dh, cells)
synchronization(dh, test_gpu=False)
if hasattr(wlb, 'gpu'):
synchronization(dh, test_gpu=True)
def test_gpu():
pytest.importorskip('waLBerla.gpu')
block_size = (4, 7, 1)
num_blocks = (3, 2, 1)
blocks = wlb.createUniformBlockGrid(blocks=num_blocks, cellsPerBlock=block_size, oneBlockPerProcess=False)
dh = ParallelDataHandling(blocks, default_ghost_layers=2)
dh.add_array('v', values_per_cell=3, dtype=np.int64, ghost_layers=2, gpu=True)
for b in dh.iterate():
b['v'].fill(42)
dh.all_to_gpu()
for b in dh.iterate():
b['v'].fill(0)
dh.to_cpu('v')
for b in dh.iterate():
np.testing.assert_equal(b['v'], 42)
@pytest.mark.parametrize('target', (pystencils.Target.CPU, pystencils.Target.GPU))
def test_kernel(target):
if target == pystencils.Target.GPU:
pytest.importorskip('waLBerla.gpu')
# 3D
blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 4), cellsPerBlock=(3, 2, 5), oneBlockPerProcess=False)
dh = ParallelDataHandling(blocks, default_target=target)
kernel_execution_jacobi(dh, target)
reduction(dh)
# 2D
blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 1), cellsPerBlock=(3, 2, 1), oneBlockPerProcess=False)
dh = ParallelDataHandling(blocks, dim=2, default_target=target)
kernel_execution_jacobi(dh, target)
reduction(dh)
def test_vtk_output():
blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 4), cellsPerBlock=(3, 2, 5), oneBlockPerProcess=False)
dh = ParallelDataHandling(blocks)
vtk_output(dh)
def test_block_iteration():
block_size = (16, 16, 16)
num_blocks = (2, 2, 2)
blocks = wlb.createUniformBlockGrid(blocks=num_blocks, cellsPerBlock=block_size, oneBlockPerProcess=False)
dh = ParallelDataHandling(blocks, default_ghost_layers=2)
dh.add_array('v', values_per_cell=1, dtype=np.int64, ghost_layers=2)
for b in dh.iterate():
b['v'].fill(1)
s = 0
for b in dh.iterate():
s += np.sum(b['v'])
assert s == 40*40*40
sl = make_slice[0:18, 0:18, 0:18]
for b in dh.iterate(slice_obj=sl):
b['v'].fill(0)
s = 0
for b in dh.iterate():
s += np.sum(b['v'])
assert s == 40*40*40 - 20*20*20
def test_getter_setter():
pytest.importorskip('waLBerla.gpu')
block_size = (2, 2, 2)
num_blocks = (2, 2, 2)
blocks = wlb.createUniformBlockGrid(blocks=num_blocks, cellsPerBlock=block_size, oneBlockPerProcess=False)
dh = ParallelDataHandling(blocks, default_ghost_layers=2, default_target=pystencils.Target.GPU)
dh.add_array('v', values_per_cell=1, dtype=np.int64, ghost_layers=2, gpu=True)
assert dh.shape == (4, 4, 4)
assert dh.periodicity == (False, False, False)
assert dh.values_per_cell('v') == 1
assert dh.has_data('v') is True
assert 'v' in dh.array_names
dh.log_on_root()
assert dh.is_root is True
assert dh.world_rank == 0
dh.to_gpu('v')
assert dh.is_on_gpu('v') is True
dh.all_to_cpu()
def test_parallel_datahandling_boundary_conditions():
pytest.importorskip('waLBerla.gpu')
dh = create_data_handling(domain_size=(7, 7), periodicity=True, parallel=True,
default_target=pystencils.Target.GPU)
src = dh.add_array('src', values_per_cell=1)
dh.fill(src.name, 0.0, ghost_layers=True)
dh.fill(src.name, 1.0, ghost_layers=False)
src2 = dh.add_array('src2', values_per_cell=1)
src_cpu = dh.add_array('src_cpu', values_per_cell=1, gpu=False)
dh.fill(src_cpu.name, 0.0, ghost_layers=True)
dh.fill(src_cpu.name, 1.0, ghost_layers=False)
boundary_stencil = [(1, 0), (-1, 0), (0, 1), (0, -1)]
boundary_handling_cpu = BoundaryHandling(dh, src_cpu.name, boundary_stencil,
name="boundary_handling_cpu", target=pystencils.Target.CPU)
boundary_handling = BoundaryHandling(dh, src.name, boundary_stencil,
name="boundary_handling_gpu", target=pystencils.Target.GPU)
neumann = Neumann()
for d in ('N', 'S', 'W', 'E'):
boundary_handling.set_boundary(neumann, slice_from_direction(d, dim=2))
boundary_handling_cpu.set_boundary(neumann, slice_from_direction(d, dim=2))
boundary_handling.prepare()
boundary_handling_cpu.prepare()
boundary_handling_cpu()
dh.all_to_gpu()
boundary_handling()
dh.all_to_cpu()
for block in dh.iterate():
np.testing.assert_almost_equal(block[src_cpu.name], block[src.name])
assert dh.custom_data_names == ('boundary_handling_cpuIndexArrays', 'boundary_handling_gpuIndexArrays')
dh.swap(src.name, src2.name, gpu=True)
def test_save_data():
domain_shape = (2, 2)
dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1, parallel=True)
dh.add_array("src", values_per_cell=9)
dh.fill("src", 1.0, ghost_layers=True)
dh.add_array("dst", values_per_cell=9)
dh.fill("dst", 1.0, ghost_layers=True)
dh.save_all(str(INPUT_FOLDER) + '/datahandling_parallel_save_test')
def test_load_data():
domain_shape = (2, 2)
dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1, parallel=True)
dh.add_array("src", values_per_cell=9)
dh.fill("src", 0.0, ghost_layers=True)
dh.add_array("dst", values_per_cell=9)
dh.fill("dst", 0.0, ghost_layers=True)
dh.load_all(str(INPUT_FOLDER) + '/datahandling_parallel_load_test')
assert np.all(dh.gather_array('src')) == 1
assert np.all(dh.gather_array('src')) == 1
%% Cell type:code id: tags:
``` python
from pystencils.session import *
from pystencils.astnodes import Block, Conditional, SympyAssignment
```
%% Cell type:code id: tags:
``` python
src, dst = ps.fields("src, dst: double[2D]", layout='c')
true_block = Block([SympyAssignment(dst[0, 0], src[-1, 0])])
false_block = Block([SympyAssignment(dst[0, 0], src[1, 0])])
ur = [true_block, Conditional(dst.center() > 0.0, true_block, false_block)]
ast = ps.create_kernel(ur)
```
%% Cell type:code id: tags:
``` python
ps.show_code(ast)
```
%% Output
%% Cell type:code id: tags:
``` python
```
import pystencils as ps
from pystencils.astnodes import Block, Conditional, SympyAssignment
def test_dot_print():
src, dst = ps.fields("src, dst: double[2D]", layout='c')
true_block = Block([SympyAssignment(dst[0, 0], src[-1, 0])])
false_block = Block([SympyAssignment(dst[0, 0], src[1, 0])])
ur = [true_block, Conditional(dst.center() > 0.0, true_block, false_block)]
ast = ps.create_kernel(ur)
import pytest
import sympy as sp
import pystencils as ps
......@@ -6,13 +7,15 @@ from pystencils.fast_approximation import (
def test_fast_sqrt():
pytest.importorskip('cupy')
f, g = ps.fields("f, g: double[2D]")
expr = sp.sqrt(f[0, 0] + f[1, 0])
assert len(insert_fast_sqrts(expr).atoms(fast_sqrt)) == 1
assert len(insert_fast_sqrts([expr])[0].atoms(fast_sqrt)) == 1
ast = ps.create_kernel(ps.Assignment(g[0, 0], insert_fast_sqrts(expr)), target='gpu')
code_str = str(ps.show_code(ast))
ast_gpu = ps.create_kernel(ps.Assignment(g[0, 0], insert_fast_sqrts(expr)), target=ps.Target.GPU)
ast_gpu.compile()
code_str = ps.get_code_str(ast_gpu)
assert '__fsqrt_rn' in code_str
expr = ps.Assignment(sp.Symbol("tmp"), 3 / sp.sqrt(f[0, 0] + f[1, 0]))
......@@ -20,12 +23,14 @@ def test_fast_sqrt():
ac = ps.AssignmentCollection([expr], [])
assert len(insert_fast_sqrts(ac).main_assignments[0].atoms(fast_inv_sqrt)) == 1
ast = ps.create_kernel(insert_fast_sqrts(ac), target='gpu')
code_str = str(ps.show_code(ast))
ast_gpu = ps.create_kernel(insert_fast_sqrts(ac), target=ps.Target.GPU)
ast_gpu.compile()
code_str = ps.get_code_str(ast_gpu)
assert '__frsqrt_rn' in code_str
def test_fast_divisions():
pytest.importorskip('cupy')
f, g = ps.fields("f, g: double[2D]")
expr = f[0, 0] / f[1, 0]
assert len(insert_fast_divisions(expr).atoms(fast_division)) == 1
......@@ -33,6 +38,7 @@ def test_fast_divisions():
expr = 1 / f[0, 0] * 2 / f[0, 1]
assert len(insert_fast_divisions(expr).atoms(fast_division)) == 1
ast = ps.create_kernel(ps.Assignment(g[0, 0], insert_fast_divisions(expr)), target='gpu')
code_str = str(ps.show_code(ast))
ast = ps.create_kernel(ps.Assignment(g[0, 0], insert_fast_divisions(expr)), target=ps.Target.GPU)
ast.compile()
code_str = ps.get_code_str(ast)
assert '__fdividef' in code_str
%% Cell type:code id: tags:
``` python
from pystencils.session import *
from pystencils.fd.derivation import *
```
%% Cell type:markdown id: tags:
# 2D standard stencils
%% Cell type:code id: tags:
``` python
stencil = [(-1, 0), (1, 0), (0, -1), (0, 1), (0, 0)]
standard_2d_00 = FiniteDifferenceStencilDerivation((0,0), stencil)
f = ps.fields("f: [2D]")
standard_2d_00_res = standard_2d_00.get_stencil()
res = standard_2d_00_res.apply(f.center)
expected = f[-1, 0] - 2 * f[0, 0] + f[1, 0]
assert res == expected
```
%% Cell type:code id: tags:
``` python
assert standard_2d_00_res.accuracy == 2
assert not standard_2d_00_res.is_isotropic
standard_2d_00_res
```
%% Output
Finite difference stencil of accuracy 2, isotropic error: False
%% Cell type:code id: tags:
``` python
standard_2d_00.get_stencil().as_array()
```
%% Output
$\displaystyle \left[\begin{matrix}0 & 0 & 0\\1 & -2 & 1\\0 & 0 & 0\end{matrix}\right]$
[[0, 0, 0], [1, -2, 1], [0, 0, 0]]
%% Cell type:markdown id: tags:
# 2D isotropic stencils
## second x-derivative
%% Cell type:code id: tags:
``` python
stencil = [(i, j) for i in (-1, 0, 1) for j in (-1, 0, 1)]
isotropic_2d_00 = FiniteDifferenceStencilDerivation((0,0), stencil)
isotropic_2d_00_res = isotropic_2d_00.get_stencil(isotropic=True)
assert isotropic_2d_00_res.is_isotropic
assert isotropic_2d_00_res.accuracy == 2
isotropic_2d_00_res
```
%% Output
Finite difference stencil of accuracy 2, isotropic error: True
%% Cell type:code id: tags:
``` python
isotropic_2d_00_res.as_array()
```
%% Output
$\displaystyle \left[\begin{matrix}\frac{1}{12} & - \frac{1}{6} & \frac{1}{12}\\\frac{5}{6} & - \frac{5}{3} & \frac{5}{6}\\\frac{1}{12} & - \frac{1}{6} & \frac{1}{12}\end{matrix}\right]$
[[1/12, -1/6, 1/12], [5/6, -5/3, 5/6], [1/12, -1/6, 1/12]]
%% Cell type:code id: tags:
``` python
plt.figure(figsize=(2,2))
isotropic_2d_00_res.visualize()
```
%% Output
%% Cell type:code id: tags:
``` python
expected_result = sp.Array([[1, -2, 1], [10, -20, 10], [1, -2, 1]]) / 12
assert expected_result == isotropic_2d_00_res.as_array()
```
%% Cell type:markdown id: tags:
## Isotropic laplacian
%% Cell type:code id: tags:
``` python
isotropic_2d_11 = FiniteDifferenceStencilDerivation((1,1), stencil)
isotropic_2d_11_res = isotropic_2d_11.get_stencil(isotropic=True)
iso_laplacian = isotropic_2d_00_res.as_array() + isotropic_2d_11_res.as_array()
iso_laplacian
```
%% Output
$\displaystyle \left[\begin{matrix}\frac{1}{6} & \frac{2}{3} & \frac{1}{6}\\\frac{2}{3} & - \frac{10}{3} & \frac{2}{3}\\\frac{1}{6} & \frac{2}{3} & \frac{1}{6}\end{matrix}\right]$
[[1/6, 2/3, 1/6], [2/3, -10/3, 2/3], [1/6, 2/3, 1/6]]
%% Cell type:code id: tags:
``` python
expected_result = sp.Array([[1, 4, 1], [4, -20, 4], [1, 4, 1]]) / 6
assert iso_laplacian == expected_result
```
%% Cell type:markdown id: tags:
# stencils for staggered fields
%% Cell type:code id: tags:
``` python
half = sp.Rational(1, 2)
fd_points_ex = (
(half, 0),
(-half, 0),
(half, 1),
(half, -1),
(-half, 1),
(-half, -1)
)
assert set(fd_points_ex) == set(FiniteDifferenceStaggeredStencilDerivation("E", 2).stencil)
fd_points_ey = (
(0, half),
(0, -half),
(-1,-half),
(-1, half),
(1, -half),
(1, half)
)
assert set(fd_points_ey) == set(FiniteDifferenceStaggeredStencilDerivation("N",2).stencil)
fd_points_c = (
(half, half),
(-half, -half),
(half, -half),
(-half, half)
)
assert set(fd_points_c) == set(FiniteDifferenceStaggeredStencilDerivation("NE",2).stencil)
assert len(FiniteDifferenceStaggeredStencilDerivation("E",3).points) == 10
assert len(FiniteDifferenceStaggeredStencilDerivation("NE",3).points) == 12
assert len(FiniteDifferenceStaggeredStencilDerivation("TNE",3).points) == 8
```
%% Cell type:code id: tags:
``` python
c = ps.fields("c: [2D]")
c3 = ps.fields("c3: [3D]")
assert FiniteDifferenceStaggeredStencilDerivation("E", 2, (0,)).apply(c.center) == c[1, 0] - c[0, 0]
assert FiniteDifferenceStaggeredStencilDerivation("W", 2, (0,)).apply(c.center) == c[0, 0] - c[-1, 0]
assert FiniteDifferenceStaggeredStencilDerivation("N", 2, (1,)).apply(c.center) == c[0, 1] - c[0, 0]
assert FiniteDifferenceStaggeredStencilDerivation("S", 2, (1,)).apply(c.center) == c[0, 0] - c[0, -1]
assert FiniteDifferenceStaggeredStencilDerivation("E", 3, (0,)).apply(c3.center) == c3[1, 0, 0] - c3[0, 0, 0]
assert FiniteDifferenceStaggeredStencilDerivation("W", 3, (0,)).apply(c3.center) == c3[0, 0, 0] - c3[-1, 0, 0]
assert FiniteDifferenceStaggeredStencilDerivation("N", 3, (1,)).apply(c3.center) == c3[0, 1, 0] - c3[0, 0, 0]
assert FiniteDifferenceStaggeredStencilDerivation("S", 3, (1,)).apply(c3.center) == c3[0, 0, 0] - c3[0, -1, 0]
assert FiniteDifferenceStaggeredStencilDerivation("T", 3, (2,)).apply(c3.center) == c3[0, 0, 1] - c3[0, 0, 0]
assert FiniteDifferenceStaggeredStencilDerivation("B", 3, (2,)).apply(c3.center) == c3[0, 0, 0] - c3[0, 0, -1]
assert FiniteDifferenceStaggeredStencilDerivation("S", 2, (0,)).apply(c.center) == \
(c[1, 0] + c[1, -1] - c[-1, 0] - c[-1, -1])/4
assert FiniteDifferenceStaggeredStencilDerivation("NE", 2, (0,)).apply(c.center) + \
FiniteDifferenceStaggeredStencilDerivation("NE", 2, (1,)).apply(c.center) == c[1, 1] - c[0, 0]
assert FiniteDifferenceStaggeredStencilDerivation("NE", 3, (0,)).apply(c3.center) + \
FiniteDifferenceStaggeredStencilDerivation("NE", 3, (1,)).apply(c3.center) == c3[1, 1, 0] - c3[0, 0, 0]
```
%% Cell type:code id: tags:
``` python
d = FiniteDifferenceStaggeredStencilDerivation("NE", 2, (0, 1))
assert d.apply(c.center) == c[0,0] + c[1,1] - c[1,0] - c[0,1]
d.visualize()
```
%% Output
%% Cell type:code id: tags:
``` python
v3 = ps.fields("v(3): [3D]")
for i in range(*v3.index_shape):
assert FiniteDifferenceStaggeredStencilDerivation("E", 3, (0,)).apply(v3.center_vector[i]) == \
v3[1,0,0](i) - v3[0,0,0](i)
```