Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 663 additions and 149 deletions
......@@ -2,7 +2,8 @@
import numpy as np
from pystencils import Assignment, Field, FieldType, create_kernel, make_slice
import pystencils as ps
from pystencils import Assignment, Field, FieldType, create_kernel
from pystencils.field import create_numpy_array_with_layout, layout_string_to_tuple
from pystencils.slicing import (
add_ghost_layers, get_ghost_region_slice, get_slice_before_ghost_layer)
......@@ -19,9 +20,9 @@ def _generate_fields(dt=np.uint64, num_directions=1, layout='numpy'):
fields = []
for size in field_sizes:
field_layout = layout_string_to_tuple(layout, len(size))
src_arr = create_numpy_array_with_layout(size, field_layout)
src_arr = create_numpy_array_with_layout(size, field_layout, dtype=dt)
array_data = np.reshape(np.arange(1, int(np.prod(size)+1)), size)
array_data = np.reshape(np.arange(1, int(np.prod(size) + 1)), size)
# Use flat iterator to input data into the array
src_arr.flat = add_ghost_layers(array_data, index_dimensions=1 if num_directions > 1 else 0).astype(dt).flat
dst_arr = np.zeros(src_arr.shape, dtype=dt)
......@@ -40,13 +41,18 @@ def test_full_scalar_field():
field_type=FieldType.BUFFER, dtype=src_arr.dtype)
pack_eqs = [Assignment(buffer.center(), src_field.center())]
pack_code = create_kernel(pack_eqs, data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
config = ps.CreateKernelConfig(data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
pack_code = create_kernel(pack_eqs, config=config)
code = ps.get_code_str(pack_code)
ps.show_code(pack_code)
pack_kernel = pack_code.compile()
pack_kernel(buffer=buffer_arr, src_field=src_arr)
unpack_eqs = [Assignment(dst_field.center(), buffer.center())]
unpack_code = create_kernel(unpack_eqs, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
config = ps.CreateKernelConfig(data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
unpack_code = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_code.compile()
unpack_kernel(dst_field=dst_arr, buffer=buffer_arr)
......@@ -70,14 +76,18 @@ def test_field_slice():
field_type=FieldType.BUFFER, dtype=src_arr.dtype)
pack_eqs = [Assignment(buffer.center(), src_field.center())]
pack_code = create_kernel(pack_eqs, data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
config = ps.CreateKernelConfig(data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
pack_code = create_kernel(pack_eqs, config=config)
pack_kernel = pack_code.compile()
pack_kernel(buffer=bufferArr, src_field=src_arr[pack_slice])
# Unpack into ghost layer of dst_field in N direction
unpack_eqs = [Assignment(dst_field.center(), buffer.center())]
unpack_code = create_kernel(unpack_eqs, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
config = ps.CreateKernelConfig(data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
unpack_code = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_code.compile()
unpack_kernel(buffer=bufferArr, dst_field=dst_arr[unpack_slice])
......@@ -102,7 +112,8 @@ def test_all_cell_values():
eq = Assignment(buffer(idx), src_field(idx))
pack_eqs.append(eq)
pack_code = create_kernel(pack_eqs, data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
config = ps.CreateKernelConfig(data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
pack_code = create_kernel(pack_eqs, config=config)
pack_kernel = pack_code.compile()
pack_kernel(buffer=bufferArr, src_field=src_arr)
......@@ -112,7 +123,8 @@ def test_all_cell_values():
eq = Assignment(dst_field(idx), buffer(idx))
unpack_eqs.append(eq)
unpack_code = create_kernel(unpack_eqs, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
config = ps.CreateKernelConfig(data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
unpack_code = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_code.compile()
unpack_kernel(buffer=bufferArr, dst_field=dst_arr)
......@@ -138,7 +150,8 @@ def test_subset_cell_values():
eq = Assignment(buffer(buffer_idx), src_field(cell_idx))
pack_eqs.append(eq)
pack_code = create_kernel(pack_eqs, data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
config = ps.CreateKernelConfig(data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
pack_code = create_kernel(pack_eqs, config=config)
pack_kernel = pack_code.compile()
pack_kernel(buffer=bufferArr, src_field=src_arr)
......@@ -148,7 +161,8 @@ def test_subset_cell_values():
eq = Assignment(dst_field(cell_idx), buffer(buffer_idx))
unpack_eqs.append(eq)
unpack_code = create_kernel(unpack_eqs, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
config = ps.CreateKernelConfig(data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
unpack_code = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_code.compile()
unpack_kernel(buffer=bufferArr, dst_field=dst_arr)
......@@ -173,7 +187,8 @@ def test_field_layouts():
eq = Assignment(buffer(idx), src_field(idx))
pack_eqs.append(eq)
pack_code = create_kernel(pack_eqs, data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
config = ps.CreateKernelConfig(data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
pack_code = create_kernel(pack_eqs, config=config)
pack_kernel = pack_code.compile()
pack_kernel(buffer=bufferArr, src_field=src_arr)
......@@ -183,19 +198,24 @@ def test_field_layouts():
eq = Assignment(dst_field(idx), buffer(idx))
unpack_eqs.append(eq)
unpack_code = create_kernel(unpack_eqs, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
config = ps.CreateKernelConfig(data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
unpack_code = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_code.compile()
unpack_kernel(buffer=bufferArr, dst_field=dst_arr)
def test_iteration_slices():
num_cell_values = 19
fields = _generate_fields(num_directions=num_cell_values)
dt = np.uint64
fields = _generate_fields(dt=dt, num_directions=num_cell_values)
for (src_arr, dst_arr, bufferArr) in fields:
src_field = Field.create_from_numpy_array("src_field", src_arr, index_dimensions=1)
dst_field = Field.create_from_numpy_array("dst_field", dst_arr, index_dimensions=1)
spatial_dimensions = len(src_arr.shape) - 1
# src_field = Field.create_from_numpy_array("src_field", src_arr, index_dimensions=1)
# dst_field = Field.create_from_numpy_array("dst_field", dst_arr, index_dimensions=1)
src_field = Field.create_generic("src_field", spatial_dimensions, index_shape=(num_cell_values,), dtype=dt)
dst_field = Field.create_generic("dst_field", spatial_dimensions, index_shape=(num_cell_values,), dtype=dt)
buffer = Field.create_generic("buffer", spatial_dimensions=1, index_dimensions=1,
field_type=FieldType.BUFFER, dtype=src_arr.dtype)
field_type=FieldType.BUFFER, dtype=src_arr.dtype)
pack_eqs = []
# Since we are packing all cell values for all cells, then
......@@ -207,13 +227,16 @@ def test_iteration_slices():
dim = src_field.spatial_dimensions
# Pack only the leftmost slice, only every second cell
pack_slice = (slice(None, None, 2),) * (dim-1) + (0, )
pack_slice = (slice(None, None, 2),) * (dim - 1) + (0,)
# Fill the entire array with data
src_arr[(slice(None, None, 1),) * dim] = np.arange(num_cell_values)
dst_arr.fill(0.0)
dst_arr.fill(0)
config = ps.CreateKernelConfig(iteration_slice=pack_slice,
data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
pack_code = create_kernel(pack_eqs, iteration_slice=pack_slice, data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
pack_code = create_kernel(pack_eqs, config=config)
pack_kernel = pack_code.compile()
pack_kernel(buffer=bufferArr, src_field=src_arr)
......@@ -223,12 +246,14 @@ def test_iteration_slices():
eq = Assignment(dst_field(idx), buffer(idx))
unpack_eqs.append(eq)
unpack_code = create_kernel(unpack_eqs, iteration_slice=pack_slice, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
config = ps.CreateKernelConfig(iteration_slice=pack_slice,
data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
unpack_code = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_code.compile()
unpack_kernel(buffer=bufferArr, dst_field=dst_arr)
# Check if only every second entry of the leftmost slice has been copied
np.testing.assert_equal(dst_arr[pack_slice], src_arr[pack_slice])
np.testing.assert_equal(dst_arr[(slice(1, None, 2),) * (dim-1) + (0,)], 0.0)
np.testing.assert_equal(dst_arr[(slice(None, None, 1),) * (dim-1) + (slice(1,None),)], 0.0)
np.testing.assert_equal(dst_arr[(slice(1, None, 2),) * (dim - 1) + (0,)], 0)
np.testing.assert_equal(dst_arr[(slice(None, None, 1),) * (dim - 1) + (slice(1, None),)], 0)
"""Tests for the (un)packing (from)to buffers on a CUDA GPU."""
from dataclasses import replace
import numpy as np
import pytest
from pystencils import Assignment, Field, FieldType
import pystencils
from pystencils import Assignment, Field, FieldType, Target, CreateKernelConfig, create_kernel, fields
from pystencils.bit_masks import flag_cond
from pystencils.field import create_numpy_array_with_layout, layout_string_to_tuple
from pystencils.gpucuda import create_cuda_kernel, make_python_function
from pystencils.slicing import (
add_ghost_layers, get_ghost_region_slice, get_slice_before_ghost_layer)
from pystencils.stencil import direction_string_to_offset
try:
# noinspection PyUnresolvedReferences
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import cupy as cp
except ImportError:
pass
......@@ -22,7 +23,7 @@ FIELD_SIZES = [(4, 3), (9, 3, 7)]
def _generate_fields(dt=np.uint8, stencil_directions=1, layout='numpy'):
pytest.importorskip('pycuda')
pytest.importorskip('cupy')
field_sizes = FIELD_SIZES
if stencil_directions > 1:
field_sizes = [s + (stencil_directions,) for s in field_sizes]
......@@ -37,10 +38,10 @@ def _generate_fields(dt=np.uint8, stencil_directions=1, layout='numpy'):
src_arr.flat = add_ghost_layers(array_data,
index_dimensions=1 if stencil_directions > 1 else 0).astype(dt).flat
gpu_src_arr = gpuarray.to_gpu(src_arr)
gpu_dst_arr = gpuarray.empty_like(gpu_src_arr)
gpu_src_arr = cp.asarray(src_arr)
gpu_dst_arr = cp.zeros_like(gpu_src_arr)
size = int(np.prod(src_arr.shape))
gpu_buffer_arr = gpuarray.zeros(size, dtype=dt)
gpu_buffer_arr = cp.zeros(size, dtype=dt)
fields.append((src_arr, gpu_src_arr, gpu_dst_arr, gpu_buffer_arr))
return fields
......@@ -57,16 +58,20 @@ def test_full_scalar_field():
pack_eqs = [Assignment(buffer.center(), src_field.center())]
pack_types = {'src_field': gpu_src_arr.dtype, 'buffer': gpu_buffer_arr.dtype}
pack_code = create_cuda_kernel(pack_eqs, type_info=pack_types)
pack_kernel = make_python_function(pack_code)
config = CreateKernelConfig(target=pystencils.Target.GPU, data_type=pack_types)
pack_ast = create_kernel(pack_eqs, config=config)
pack_kernel = pack_ast.compile()
pack_kernel(buffer=gpu_buffer_arr, src_field=gpu_src_arr)
unpack_eqs = [Assignment(dst_field.center(), buffer.center())]
unpack_types = {'dst_field': gpu_dst_arr.dtype, 'buffer': gpu_buffer_arr.dtype}
unpack_code = create_cuda_kernel(unpack_eqs, type_info=unpack_types)
unpack_kernel = make_python_function(unpack_code)
config = CreateKernelConfig(target=pystencils.Target.GPU, data_type=unpack_types)
unpack_ast = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_ast.compile()
unpack_kernel(dst_field=gpu_dst_arr, buffer=gpu_buffer_arr)
dst_arr = gpu_dst_arr.get()
......@@ -91,17 +96,21 @@ def test_field_slice():
pack_eqs = [Assignment(buffer.center(), src_field.center())]
pack_types = {'src_field': gpu_src_arr.dtype, 'buffer': gpu_buffer_arr.dtype}
pack_code = create_cuda_kernel(pack_eqs, type_info=pack_types)
pack_kernel = make_python_function(pack_code)
config = CreateKernelConfig(target=pystencils.Target.GPU, data_type=pack_types)
pack_ast = create_kernel(pack_eqs, config=config)
pack_kernel = pack_ast.compile()
pack_kernel(buffer=gpu_buffer_arr, src_field=gpu_src_arr[pack_slice])
# Unpack into ghost layer of dst_field in N direction
unpack_eqs = [Assignment(dst_field.center(), buffer.center())]
unpack_types = {'dst_field': gpu_dst_arr.dtype, 'buffer': gpu_buffer_arr.dtype}
unpack_code = create_cuda_kernel(unpack_eqs, type_info=unpack_types)
unpack_kernel = make_python_function(unpack_code)
config = CreateKernelConfig(target=pystencils.Target.GPU, data_type=unpack_types)
unpack_ast = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_ast.compile()
unpack_kernel(buffer=gpu_buffer_arr, dst_field=gpu_dst_arr[unpack_slice])
dst_arr = gpu_dst_arr.get()
......@@ -127,8 +136,11 @@ def test_all_cell_values():
pack_eqs.append(eq)
pack_types = {'src_field': gpu_src_arr.dtype, 'buffer': gpu_buffer_arr.dtype}
pack_code = create_cuda_kernel(pack_eqs, type_info=pack_types)
pack_kernel = make_python_function(pack_code)
config = CreateKernelConfig(target=pystencils.Target.GPU, data_type=pack_types)
pack_code = create_kernel(pack_eqs, config=config)
pack_kernel = pack_code.compile()
pack_kernel(buffer=gpu_buffer_arr, src_field=gpu_src_arr)
unpack_eqs = []
......@@ -138,8 +150,10 @@ def test_all_cell_values():
unpack_eqs.append(eq)
unpack_types = {'dst_field': gpu_dst_arr.dtype, 'buffer': gpu_buffer_arr.dtype}
unpack_code = create_cuda_kernel(unpack_eqs, type_info=unpack_types)
unpack_kernel = make_python_function(unpack_code)
config = CreateKernelConfig(target=pystencils.Target.GPU, data_type=unpack_types)
unpack_ast = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_ast.compile()
unpack_kernel(buffer=gpu_buffer_arr, dst_field=gpu_dst_arr)
dst_arr = gpu_dst_arr.get()
......@@ -148,7 +162,7 @@ def test_all_cell_values():
def test_subset_cell_values():
"""Tests (un)packing a subset of cell values of the a field (from)to a buffer."""
"""Tests (un)packing a subset of cell values of a field (from)to a buffer."""
num_cell_values = 7
# Cell indices of the field to be (un)packed (from)to the buffer
cell_indices = [1, 3, 5, 6]
......@@ -167,8 +181,9 @@ def test_subset_cell_values():
pack_eqs.append(eq)
pack_types = {'src_field': gpu_src_arr.dtype, 'buffer': gpu_buffer_arr.dtype}
pack_code = create_cuda_kernel(pack_eqs, type_info=pack_types)
pack_kernel = make_python_function(pack_code)
config = CreateKernelConfig(target=pystencils.Target.GPU, data_type=pack_types)
pack_ast = create_kernel(pack_eqs, config=config)
pack_kernel = pack_ast.compile()
pack_kernel(buffer=gpu_buffer_arr, src_field=gpu_src_arr)
unpack_eqs = []
......@@ -178,8 +193,10 @@ def test_subset_cell_values():
unpack_eqs.append(eq)
unpack_types = {'dst_field': gpu_dst_arr.dtype, 'buffer': gpu_buffer_arr.dtype}
unpack_code = create_cuda_kernel(unpack_eqs, type_info=unpack_types)
unpack_kernel = make_python_function(unpack_code)
config = CreateKernelConfig(target=pystencils.Target.GPU, data_type=unpack_types)
unpack_ast = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_ast.compile()
unpack_kernel(buffer=gpu_buffer_arr, dst_field=gpu_dst_arr)
dst_arr = gpu_dst_arr.get()
......@@ -206,8 +223,10 @@ def test_field_layouts():
pack_eqs.append(eq)
pack_types = {'src_field': gpu_src_arr.dtype, 'buffer': gpu_buffer_arr.dtype}
pack_code = create_cuda_kernel(pack_eqs, type_info=pack_types)
pack_kernel = make_python_function(pack_code)
config = CreateKernelConfig(target=pystencils.Target.GPU, data_type=pack_types)
pack_ast = create_kernel(pack_eqs, config=config)
pack_kernel = pack_ast.compile()
pack_kernel(buffer=gpu_buffer_arr, src_field=gpu_src_arr)
unpack_eqs = []
......@@ -217,6 +236,99 @@ def test_field_layouts():
unpack_eqs.append(eq)
unpack_types = {'dst_field': gpu_dst_arr.dtype, 'buffer': gpu_buffer_arr.dtype}
unpack_code = create_cuda_kernel(unpack_eqs, type_info=unpack_types)
unpack_kernel = make_python_function(unpack_code)
config = CreateKernelConfig(target=pystencils.Target.GPU, data_type=unpack_types)
unpack_ast = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_ast.compile()
unpack_kernel(buffer=gpu_buffer_arr, dst_field=gpu_dst_arr)
def test_buffer_indexing():
src_field, dst_field = fields(f'pdfs_src(19), pdfs_dst(19) :double[3D]')
mask_field = fields(f'mask : uint32 [3D]')
buffer = Field.create_generic('buffer', spatial_dimensions=1, field_type=FieldType.BUFFER,
dtype="float64",
index_shape=(19,))
src_field_size = src_field.spatial_shape
mask_field_size = mask_field.spatial_shape
up = Assignment(buffer(0), flag_cond(1, mask_field.center, src_field[0, 1, 0](1)))
iteration_slice = tuple(slice(None, None, 2) for _ in range(3))
config = CreateKernelConfig(target=Target.GPU)
config = replace(config, iteration_slice=iteration_slice, ghost_layers=0)
ast = create_kernel(up, config=config)
parameters = ast.get_parameters()
spatial_shape_symbols = [p.symbol for p in parameters if p.is_field_shape]
# The loop counters as well as the resolved field access should depend on one common spatial shape
if spatial_shape_symbols[0] in mask_field_size:
for s in spatial_shape_symbols:
assert s in mask_field_size
if spatial_shape_symbols[0] in src_field_size:
for s in spatial_shape_symbols:
assert s in src_field_size
assert len(spatial_shape_symbols) <= 3
@pytest.mark.parametrize('gpu_indexing', ("block", "line"))
def test_iteration_slices(gpu_indexing):
num_cell_values = 19
dt = np.uint64
fields = _generate_fields(dt=dt, stencil_directions=num_cell_values)
for (src_arr, gpu_src_arr, gpu_dst_arr, gpu_buffer_arr) in fields:
src_field = Field.create_from_numpy_array("src_field", gpu_src_arr, index_dimensions=1)
dst_field = Field.create_from_numpy_array("dst_field", gpu_src_arr, index_dimensions=1)
buffer = Field.create_generic("buffer", spatial_dimensions=1, index_dimensions=1,
field_type=FieldType.BUFFER, dtype=src_arr.dtype)
pack_eqs = []
# Since we are packing all cell values for all cells, then
# the buffer index is equivalent to the field index
for idx in range(num_cell_values):
eq = Assignment(buffer(idx), src_field(idx))
pack_eqs.append(eq)
dim = src_field.spatial_dimensions
# Pack only the leftmost slice, only every second cell
pack_slice = (slice(None, None, 2),) * (dim - 1) + (0,)
# Fill the entire array with data
src_arr[(slice(None, None, 1),) * dim] = np.arange(num_cell_values)
gpu_src_arr.set(src_arr)
gpu_dst_arr.fill(0)
config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice,
data_type={'src_field': gpu_src_arr.dtype, 'buffer': gpu_buffer_arr.dtype},
gpu_indexing=gpu_indexing)
pack_code = create_kernel(pack_eqs, config=config)
pack_kernel = pack_code.compile()
pack_kernel(buffer=gpu_buffer_arr, src_field=gpu_src_arr)
unpack_eqs = []
for idx in range(num_cell_values):
eq = Assignment(dst_field(idx), buffer(idx))
unpack_eqs.append(eq)
config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice,
data_type={'dst_field': gpu_dst_arr.dtype, 'buffer': gpu_buffer_arr.dtype},
gpu_indexing=gpu_indexing)
unpack_code = create_kernel(unpack_eqs, config=config)
unpack_kernel = unpack_code.compile()
unpack_kernel(buffer=gpu_buffer_arr, dst_field=gpu_dst_arr)
dst_arr = gpu_dst_arr.get()
src_arr = gpu_src_arr.get()
# Check if only every second entry of the leftmost slice has been copied
np.testing.assert_equal(dst_arr[pack_slice], src_arr[pack_slice])
np.testing.assert_equal(dst_arr[(slice(1, None, 2),) * (dim - 1) + (0,)], 0)
np.testing.assert_equal(dst_arr[(slice(None, None, 1),) * (dim - 1) + (slice(1, None),)], 0)
......@@ -35,11 +35,11 @@ def add_fixed_constant_boundary_handling(assignments, with_cse):
for a in assignment.rhs.atoms(Field.Access) if not a.is_absolute_access
})) for assignment in assignments.all_assignments]
subs = [{a: ConditionalFieldAccess(a, is_out_of_bound(
sp.Matrix(a.offsets) + x_vector(ndim), common_shape))
for a in assignment.rhs.atoms(Field.Access) if not a.is_absolute_access
} for assignment in assignments.all_assignments]
print(subs)
# subs = [{a: ConditionalFieldAccess(a, is_out_of_bound(
# sp.Matrix(a.offsets) + x_vector(ndim), common_shape))
# for a in assignment.rhs.atoms(Field.Access) if not a.is_absolute_access
# } for assignment in assignments.all_assignments]
# print(subs)
if with_cse:
safe_assignments = sympy_cse(ps.AssignmentCollection(safe_assignments))
......@@ -48,22 +48,20 @@ def add_fixed_constant_boundary_handling(assignments, with_cse):
return ps.AssignmentCollection(safe_assignments)
@pytest.mark.parametrize('dtype', ('float64', 'float32'))
@pytest.mark.parametrize('with_cse', (False, 'with_cse'))
def test_boundary_check(with_cse):
def test_boundary_check(dtype, with_cse):
f, g = ps.fields(f"f, g : {dtype}[2D]")
stencil = ps.Assignment(g[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4)
f, g = ps.fields("f, g : [2D]")
stencil = ps.Assignment(g[0, 0],
(f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4)
f_arr = np.random.rand(1000, 1000)
f_arr = np.random.rand(10, 10).astype(dtype=dtype)
g_arr = np.zeros_like(f_arr)
# kernel(f=f_arr, g=g_arr)
assignments = add_fixed_constant_boundary_handling(ps.AssignmentCollection([stencil]), with_cse)
print(assignments)
kernel_checked = ps.create_kernel(assignments, ghost_layers=0).compile()
ps.show_code(kernel_checked)
config = ps.CreateKernelConfig(data_type=dtype, default_number_float=dtype, ghost_layers=0)
kernel_checked = ps.create_kernel(assignments, config=config).compile()
# ps.show_code(kernel_checked)
# No SEGFAULT, please!!
kernel_checked(f=f_arr, g=g_arr)
......@@ -3,37 +3,39 @@ import sympy as sp
import pytest
import pystencils as ps
from pystencils.astnodes import Block, Conditional
from pystencils.alignedarray import aligned_zeros
from pystencils.astnodes import Block, Conditional, SympyAssignment
from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set
from pystencils.enums import Target
from pystencils.cpu.vectorization import vec_all, vec_any
from pystencils.node_collection import NodeCollection
supported_instruction_sets = get_supported_instruction_sets() if get_supported_instruction_sets() else []
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('dtype', ('float', 'double'))
@pytest.mark.parametrize('dtype', ('float32', 'float64'))
def test_vec_any(instruction_set, dtype):
if instruction_set in ['sve', 'rvv']:
if instruction_set in ['sve', 'sve2', 'sme', 'rvv']:
width = 4 # we don't know the actual value
else:
width = get_vector_instruction_set(dtype, instruction_set)['width']
data_arr = np.zeros((4 * width, 4 * width), dtype=np.float64 if dtype == 'double' else np.float32)
data_arr = np.zeros((4 * width, 4 * width), dtype=dtype)
data_arr[3:9, 1:3 * width - 1] = 1.0
data = ps.fields(f"data: {dtype}[2D]", data=data_arr)
c = [
ps.Assignment(sp.Symbol("t1"), vec_any(data.center() > 0.0)),
Conditional(vec_any(data.center() > 0.0), Block([
ps.Assignment(data.center(), 2.0)
]))
SympyAssignment(sp.Symbol("t1"), vec_any(data.center() > 0.0)),
Conditional(vec_any(data.center() > 0.0), Block([SympyAssignment(data.center(), 2.0)]))
]
ast = ps.create_kernel(c, target=ps.Target.CPU,
assignmets = NodeCollection(c)
ast = ps.create_kernel(assignments=assignmets, target=ps.Target.CPU,
cpu_vectorize_info={'instruction_set': instruction_set})
kernel = ast.compile()
kernel(data=data_arr)
if instruction_set in ['sve', 'rvv']:
if instruction_set in ['sve', 'sve2', 'sme', 'rvv']:
# we only know that the first value has changed
np.testing.assert_equal(data_arr[3:9, :3 * width - 1], 2.0)
else:
......@@ -41,27 +43,24 @@ def test_vec_any(instruction_set, dtype):
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('dtype', ('float', 'double'))
@pytest.mark.parametrize('dtype', ('float32', 'float64'))
def test_vec_all(instruction_set, dtype):
if instruction_set in ['sve', 'rvv']:
if instruction_set in ['sve', 'sve2', 'sme', 'rvv']:
width = 1000 # we don't know the actual value, need something guaranteed larger than vector
else:
width = get_vector_instruction_set(dtype, instruction_set)['width']
data_arr = np.zeros((4 * width, 4 * width), dtype=np.float64 if dtype == 'double' else np.float32)
data_arr = np.zeros((4 * width, 4 * width), dtype=dtype)
data_arr[3:9, 1:3 * width - 1] = 1.0
data = ps.fields(f"data: {dtype}[2D]", data=data_arr)
c = [
Conditional(vec_all(data.center() > 0.0), Block([
ps.Assignment(data.center(), 2.0)
]))
]
ast = ps.create_kernel(c, target=Target.CPU,
c = [Conditional(vec_all(data.center() > 0.0), Block([SympyAssignment(data.center(), 2.0)]))]
assignmets = NodeCollection(c)
ast = ps.create_kernel(assignmets, target=Target.CPU,
cpu_vectorize_info={'instruction_set': instruction_set})
kernel = ast.compile()
kernel(data=data_arr)
if instruction_set in ['sve', 'rvv']:
if instruction_set in ['sve', 'sve2', 'sme', 'rvv']:
# we only know that some values in the middle have been replaced
assert np.all(data_arr[3:9, :2] <= 1.0)
assert np.any(data_arr[3:9, 2:] == 2.0)
......@@ -88,26 +87,69 @@ def test_boolean_before_loop():
ast = ps.create_kernel(a, cpu_vectorize_info={'instruction_set': supported_instruction_sets[-1]})
kernel = ast.compile()
kernel(f=f_arr, g=g_arr, t2=1.0)
print(g)
# print(g)
np.testing.assert_array_equal(g_arr, 1.0)
kernel(f=f_arr, g=g_arr, t2=-1.0)
np.testing.assert_array_equal(g_arr, 42.0)
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('dtype', ('float', 'double'))
def test_vec_maskstore(instruction_set, dtype):
data_arr = np.zeros((16, 16), dtype=np.float64 if dtype == 'double' else np.float32)
@pytest.mark.parametrize('dtype', ('float32', 'float64'))
@pytest.mark.parametrize('nontemporal', [False, True])
@pytest.mark.parametrize('aligned', [False, True])
def test_vec_maskstore(instruction_set, dtype, nontemporal, aligned):
data_arr = (aligned_zeros if aligned else np.zeros)((16, 16), dtype=dtype)
data_arr[3:-3, 3:-3] = 1.0
data = ps.fields(f"data: {dtype}[2D]", data=data_arr)
c = [
Conditional(data.center() < 1.0, Block([
ps.Assignment(data.center(), 2.0)
]))
]
ast = ps.create_kernel(c, target=Target.CPU,
cpu_vectorize_info={'instruction_set': instruction_set})
c = [Conditional(data.center() < 1.0, Block([SympyAssignment(data.center(), 2.0)]))]
assignmets = NodeCollection(c)
config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set,
'nontemporal': nontemporal,
'assume_aligned': aligned},
default_number_float=dtype)
ast = ps.create_kernel(assignmets, config=config)
if 'maskStore' in ast.instruction_set:
instruction = 'maskStream' if nontemporal and 'maskStream' in ast.instruction_set else (
'maskStoreA' if aligned and 'maskStoreA' in ast.instruction_set else 'maskStore')
assert ast.instruction_set[instruction].split('{')[0] in ps.get_code_str(ast)
print(ps.get_code_str(ast))
kernel = ast.compile()
kernel(data=data_arr)
np.testing.assert_equal(data_arr[:3, :], 2.0)
np.testing.assert_equal(data_arr[-3:, :], 2.0)
np.testing.assert_equal(data_arr[:, :3], 2.0)
np.testing.assert_equal(data_arr[:, -3:], 2.0)
np.testing.assert_equal(data_arr[3:-3, 3:-3], 1.0)
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('dtype', ('float32', 'float64'))
@pytest.mark.parametrize('nontemporal', [False, True])
def test_vec_maskscatter(instruction_set, dtype, nontemporal):
data_arr = np.zeros((16, 16), dtype=dtype)
data_arr[3:-3, 3:-3] = 1.0
data = ps.fields(f"data: {dtype}[2D]")
c = [Conditional(data.center() < 1.0, Block([SympyAssignment(data.center(), 2.0)]))]
assignmets = NodeCollection(c)
config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set,
'nontemporal': nontemporal},
default_number_float=dtype)
if 'maskStoreS' not in get_vector_instruction_set(dtype, instruction_set) \
and not instruction_set.startswith('sve'):
with pytest.warns(UserWarning) as warn:
ast = ps.create_kernel(assignmets, config=config)
assert 'Could not vectorize loop' in warn[0].message.args[0]
else:
with pytest.warns(None) as warn:
ast = ps.create_kernel(assignmets, config=config)
assert len(warn) == 0
instruction = 'maskStreamS' if nontemporal and 'maskStreamS' in ast.instruction_set else 'maskStoreS'
assert ast.instruction_set[instruction].split('{')[0] in ps.get_code_str(ast)
print(ps.get_code_str(ast))
kernel = ast.compile()
kernel(data=data_arr)
np.testing.assert_equal(data_arr[:3, :], 2.0)
......
from collections import defaultdict
import numpy as np
import pytest
from pystencils import CreateKernelConfig, Target, Backend
from pystencils.typing import BasicType
def test_config():
# targets
config = CreateKernelConfig(target=Target.CPU)
assert config.target == Target.CPU
assert config.backend == Backend.C
config = CreateKernelConfig(target=Target.GPU)
assert config.target == Target.GPU
assert config.backend == Backend.CUDA
# typing
config = CreateKernelConfig(data_type=np.float64)
assert isinstance(config.data_type, defaultdict)
assert config.data_type.default_factory() == BasicType('float64')
assert config.default_number_float == BasicType('float64')
assert config.default_number_int == BasicType('int64')
config = CreateKernelConfig(data_type=np.float32)
assert isinstance(config.data_type, defaultdict)
assert config.data_type.default_factory() == BasicType('float32')
assert config.default_number_float == BasicType('float32')
assert config.default_number_int == BasicType('int64')
config = CreateKernelConfig(data_type=np.float32, default_number_float=np.float64)
assert isinstance(config.data_type, defaultdict)
assert config.data_type.default_factory() == BasicType('float32')
assert config.default_number_float == BasicType('float64')
assert config.default_number_int == BasicType('int64')
config = CreateKernelConfig(data_type=np.float32, default_number_float=np.float64, default_number_int=np.int16)
assert isinstance(config.data_type, defaultdict)
assert config.data_type.default_factory() == BasicType('float32')
assert config.default_number_float == BasicType('float64')
assert config.default_number_int == BasicType('int16')
config = CreateKernelConfig(data_type='float64')
assert isinstance(config.data_type, defaultdict)
assert config.data_type.default_factory() == BasicType('float64')
assert config.default_number_float == BasicType('float64')
assert config.default_number_int == BasicType('int64')
config = CreateKernelConfig(data_type={'a': np.float64, 'b': np.float32})
assert isinstance(config.data_type, defaultdict)
assert config.data_type.default_factory() == BasicType('float64')
assert config.default_number_float == BasicType('float64')
assert config.default_number_int == BasicType('int64')
config = CreateKernelConfig(data_type={'a': np.float32, 'b': np.int32})
assert isinstance(config.data_type, defaultdict)
assert config.data_type.default_factory() == BasicType('float32')
assert config.default_number_float == BasicType('float32')
assert config.default_number_int == BasicType('int64')
def test_config_target_as_string():
with pytest.raises(ValueError):
CreateKernelConfig(target='cpu')
def test_config_backend_as_string():
with pytest.raises(ValueError):
CreateKernelConfig(backend='C')
def test_config_python_types():
with pytest.raises(ValueError):
CreateKernelConfig(data_type=float)
def test_config_python_types2():
with pytest.raises(ValueError):
CreateKernelConfig(data_type={'a': float})
def test_config_python_types3():
with pytest.raises(ValueError):
CreateKernelConfig(default_number_float=float)
def test_config_python_types4():
with pytest.raises(ValueError):
CreateKernelConfig(default_number_int=int)
def test_config_python_types5():
with pytest.raises(ValueError):
CreateKernelConfig(data_type="float")
def test_config_python_types6():
with pytest.raises(ValueError):
CreateKernelConfig(default_number_float="float")
def test_config_python_types7():
dtype = defaultdict(lambda: 'float', {'a': np.float64, 'b': np.int64})
with pytest.raises(ValueError):
CreateKernelConfig(data_type=dtype)
def test_config_python_types8():
dtype = defaultdict(lambda: float, {'a': np.float64, 'b': np.int64})
with pytest.raises(ValueError):
CreateKernelConfig(data_type=dtype)
def test_config_python_types9():
dtype = defaultdict(lambda: 'float32', {'a': 'float', 'b': np.int64})
with pytest.raises(ValueError):
CreateKernelConfig(data_type=dtype)
def test_config_python_types10():
dtype = defaultdict(lambda: 'float32', {'a': float, 'b': np.int64})
with pytest.raises(ValueError):
CreateKernelConfig(data_type=dtype)
import numpy as np
import sympy as sp
import pystencils as ps
import pystencils.config
def test_create_kernel_config():
c = pystencils.config.CreateKernelConfig()
assert c.backend == ps.Backend.C
assert c.target == ps.Target.CPU
c = pystencils.config.CreateKernelConfig(target=ps.Target.GPU)
assert c.backend == ps.Backend.CUDA
c = pystencils.config.CreateKernelConfig(backend=ps.Backend.CUDA)
assert c.target == ps.Target.CPU
assert c.backend == ps.Backend.CUDA
def test_kernel_decorator_config():
config = pystencils.config.CreateKernelConfig()
a, b, c = ps.fields(a=np.ones(100), b=np.ones(100), c=np.ones(100))
@ps.kernel_config(config)
def test():
a[0] @= b[0] + c[0]
ps.create_kernel(**test)
def test_kernel_decorator2():
h = sp.symbols("h")
dtype = "float64"
src, dst = ps.fields(f"src, src_tmp: {dtype}[3D]")
@ps.kernel
def kernel_func():
dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0]
+ src[0, 1, 0] + src[0, -1, 0]
+ src[0, 0, 1] + src[0, 0, -1]) / (6 * h ** 2)
# assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=2)
ast = ps.create_kernel(kernel_func)
code = ps.get_code_str(ast)
from subprocess import CalledProcessError
import pytest
import sympy
import pystencils
import pystencils.cpu.cpujit
......@@ -25,10 +24,10 @@ class ScreamingGpuBackend(CudaBackend):
def test_custom_backends_cpu():
z, x, y = pystencils.fields("z, y, x: [2d]")
z, y, x = pystencils.fields("z, y, x: [2d]")
normal_assignments = pystencils.AssignmentCollection([pystencils.Assignment(
z[0, 0], x[0, 0] * sympy.log(x[0, 0] * y[0, 0]))], [])
z[0, 0], x[0, 0] * x[0, 0] * y[0, 0])], [])
ast = pystencils.create_kernel(normal_assignments, target=Target.CPU)
pystencils.show_code(ast, ScreamingBackend())
......@@ -37,16 +36,16 @@ def test_custom_backends_cpu():
def test_custom_backends_gpu():
pytest.importorskip('pycuda')
import pycuda.driver
import pystencils.gpucuda.cudajit
pytest.importorskip('cupy')
import cupy
import pystencils.gpu.gpujit
z, x, y = pystencils.fields("z, y, x: [2d]")
normal_assignments = pystencils.AssignmentCollection([pystencils.Assignment(
z[0, 0], x[0, 0] * sympy.log(x[0, 0] * y[0, 0]))], [])
z[0, 0], x[0, 0] * x[0, 0] * y[0, 0])], [])
ast = pystencils.create_kernel(normal_assignments, target=Target.GPU)
pystencils.show_code(ast, ScreamingGpuBackend())
with pytest.raises(pycuda.driver.CompileError):
pystencils.gpucuda.cudajit.make_python_function(ast, custom_backend=ScreamingGpuBackend())
with pytest.raises((cupy.cuda.compiler.JitifyException, cupy.cuda.compiler.CompileException)):
pystencils.gpu.gpujit.make_python_function(ast, custom_backend=ScreamingGpuBackend())
......@@ -6,8 +6,7 @@ import numpy as np
import pystencils as ps
from pystencils import create_data_handling, create_kernel
from pystencils.datahandling.pycuda import PyCudaArrayHandler
from pystencils.datahandling.pyopencl import PyOpenClArrayHandler
from pystencils.gpu.gpu_array_handler import GPUArrayHandler
from pystencils.enums import Target
try:
......@@ -16,6 +15,12 @@ except ImportError:
import unittest.mock
pytest = unittest.mock.MagicMock()
try:
import cupy.cuda.runtime
device_numbers = range(cupy.cuda.runtime.getDeviceCount())
except ImportError:
device_numbers = []
SCRIPT_FOLDER = Path(__file__).parent.absolute()
INPUT_FOLDER = SCRIPT_FOLDER / "test_data"
......@@ -86,11 +91,7 @@ def access_and_gather(dh, domain_size):
def synchronization(dh, test_gpu=False):
field_name = 'comm_field_test'
if test_gpu:
try:
from pycuda import driver
import pycuda.autoinit
except ImportError:
return
pytest.importorskip("cupy")
field_name += 'Gpu'
dh.add_array(field_name, ghost_layers=1, dtype=np.int8, cpu=True, gpu=test_gpu)
......@@ -117,7 +118,7 @@ def synchronization(dh, test_gpu=False):
def kernel_execution_jacobi(dh, target):
test_gpu = target == Target.GPU or target == Target.OPENCL
test_gpu = target == Target.GPU
dh.add_array('f', gpu=test_gpu)
dh.add_array('tmp', gpu=test_gpu)
......@@ -133,7 +134,7 @@ def kernel_execution_jacobi(dh, target):
def jacobi():
dh.fields.tmp.center @= sum(dh.fields.f.neighbors(stencil)) / len(stencil)
kernel = create_kernel(jacobi, target=target).compile()
kernel = create_kernel(jacobi, config=ps.CreateKernelConfig(target=target)).compile()
for b in dh.iterate(ghost_layers=1):
b['f'].fill(42)
dh.run_kernel(kernel)
......@@ -216,22 +217,18 @@ def test_kernel():
reduction(dh)
try:
import pycuda
import cupy
dh = create_data_handling(domain_size=domain_shape, periodicity=True)
kernel_execution_jacobi(dh, Target.GPU)
except ImportError:
pass
@pytest.mark.parametrize('target', (Target.CPU, Target.GPU, Target.OPENCL))
@pytest.mark.parametrize('target', (Target.CPU, Target.GPU))
def test_kernel_param(target):
for domain_shape in [(4, 5), (3, 4, 5)]:
if target == Target.GPU:
pytest.importorskip('pycuda')
if target == Target.OPENCL:
pytest.importorskip('pyopencl')
from pystencils.opencl.opencljit import init_globally
init_globally()
pytest.importorskip('cupy')
dh = create_data_handling(domain_size=domain_shape, periodicity=True, default_target=target)
kernel_execution_jacobi(dh, target)
......@@ -260,6 +257,20 @@ def test_add_arrays():
assert y == dh.fields['y']
@pytest.mark.parametrize('shape', [(17, 12), (7, 11, 18)])
@pytest.mark.parametrize('layout', ['zyxf', 'fzyx'])
def test_add_arrays_with_layout(shape, layout):
pytest.importorskip('cupy')
dh = create_data_handling(domain_size=shape, default_layout=layout, default_target=ps.Target.GPU)
f1 = dh.add_array("f1", values_per_cell=19)
dh.fill(f1.name, 1.0)
assert dh.cpu_arrays[f1.name].shape == dh.gpu_arrays[f1.name].shape
assert dh.cpu_arrays[f1.name].strides == dh.gpu_arrays[f1.name].strides
assert dh.cpu_arrays[f1.name].dtype == dh.gpu_arrays[f1.name].dtype
def test_get_kwarg():
domain_shape = (10, 10)
field_description = 'src, dst'
......@@ -270,7 +281,7 @@ def test_get_kwarg():
dh.fill("dst", 0.0, ghost_layers=True)
with pytest.raises(ValueError):
dh.add_array('src')
dh.add_array('src', values_per_cell=1)
ur = ps.Assignment(src.center, dst.center)
kernel = ps.create_kernel(ur).compile()
......@@ -281,22 +292,20 @@ def test_get_kwarg():
def test_add_custom_data():
pytest.importorskip('pycuda')
import pycuda.gpuarray as gpuarray
import pycuda.autoinit # noqa
pytest.importorskip('cupy')
import cupy as cp
def cpu_data_create_func():
return np.ones((2, 2), dtype=np.float64)
def gpu_data_create_func():
return gpuarray.zeros((2, 2), dtype=np.float64)
return cp.zeros((2, 2), dtype=np.float64)
def cpu_to_gpu_transfer_func(gpuarr, cpuarray):
gpuarr.set(cpuarray)
def gpu_to_cpu_transfer_func(gpuarr, cpuarray):
gpuarr.get(cpuarray)
cpuarray[:] = gpuarr.get()
dh = create_data_handling(domain_size=(10, 10))
dh.add_custom_data('custom_data',
......@@ -362,20 +371,11 @@ def test_load_data():
assert np.all(dh.cpu_arrays['dst2']) == 0
@pytest.mark.parametrize('target', (Target.GPU, Target.OPENCL))
def test_array_handler(target):
@pytest.mark.parametrize("device_number", device_numbers)
def test_array_handler(device_number):
size = (2, 2)
if target == Target.GPU:
pytest.importorskip('pycuda')
array_handler = PyCudaArrayHandler()
if target == Target.OPENCL:
pytest.importorskip('pyopencl')
import pyopencl as cl
from pystencils.opencl.opencljit import init_globally
init_globally()
ctx = cl.create_some_context(0)
queue = cl.CommandQueue(ctx)
array_handler = PyOpenClArrayHandler(queue)
pytest.importorskip('cupy')
array_handler = GPUArrayHandler(device_number)
zero_array = array_handler.zeros(size)
cpu_array = np.empty(size)
......@@ -389,8 +389,23 @@ def test_array_handler(target):
empty = array_handler.empty(size)
assert empty.strides == (16, 8)
empty = array_handler.empty(shape=size, layout=(1, 0))
empty = array_handler.empty(shape=size, order="F")
assert empty.strides == (8, 16)
random_array = array_handler.randn(size)
cpu_array = np.empty((20, 40), dtype=np.float64)
gpu_array = array_handler.to_gpu(cpu_array)
assert cpu_array.base is None
assert gpu_array.base is None
assert gpu_array.strides == cpu_array.strides
cpu_array2 = np.empty((20, 40), dtype=np.float64)
cpu_array2 = cpu_array2.swapaxes(0, 1)
gpu_array2 = array_handler.to_gpu(cpu_array2)
assert cpu_array2.base is not None
assert gpu_array2.base is not None
assert gpu_array2.strides == cpu_array2.strides
......@@ -4,7 +4,6 @@ import waLBerla as wlb
import pystencils
from pystencils import make_slice
from tempfile import TemporaryDirectory
from pathlib import Path
from pystencils.boundaries import BoundaryHandling, Neumann
......@@ -12,7 +11,7 @@ from pystencils.slicing import slice_from_direction
from pystencils.datahandling.parallel_datahandling import ParallelDataHandling
from pystencils.datahandling import create_data_handling
from pystencils_tests.test_datahandling import (
from tests.test_datahandling import (
access_and_gather, kernel_execution_jacobi, reduction, synchronization, vtk_output)
SCRIPT_FOLDER = Path(__file__).parent.absolute()
......@@ -34,14 +33,12 @@ def test_access_and_gather():
dh = ParallelDataHandling(blocks, default_ghost_layers=2)
access_and_gather(dh, cells)
synchronization(dh, test_gpu=False)
if hasattr(wlb, 'cuda'):
if hasattr(wlb, 'gpu'):
synchronization(dh, test_gpu=True)
def test_gpu():
if not hasattr(wlb, 'cuda'):
print("Skip GPU tests because walberla was built without CUDA")
return
pytest.importorskip('waLBerla.gpu')
block_size = (4, 7, 1)
num_blocks = (3, 2, 1)
......@@ -59,24 +56,22 @@ def test_gpu():
np.testing.assert_equal(b['v'], 42)
def test_kernel():
@pytest.mark.parametrize('target', (pystencils.Target.CPU, pystencils.Target.GPU))
def test_kernel(target):
if target == pystencils.Target.GPU:
pytest.importorskip('waLBerla.gpu')
for gpu in (True, False):
if gpu and not hasattr(wlb, 'cuda'):
print("Skipping CUDA tests because walberla was built without GPU support")
continue
# 3D
blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 4), cellsPerBlock=(3, 2, 5), oneBlockPerProcess=False)
dh = ParallelDataHandling(blocks)
kernel_execution_jacobi(dh, pystencils.Target.GPU)
reduction(dh)
# 3D
blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 4), cellsPerBlock=(3, 2, 5), oneBlockPerProcess=False)
dh = ParallelDataHandling(blocks, default_target=target)
kernel_execution_jacobi(dh, target)
reduction(dh)
# 2D
blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 1), cellsPerBlock=(3, 2, 1), oneBlockPerProcess=False)
dh = ParallelDataHandling(blocks, dim=2)
kernel_execution_jacobi(dh, pystencils.Target.GPU)
reduction(dh)
# 2D
blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 1), cellsPerBlock=(3, 2, 1), oneBlockPerProcess=False)
dh = ParallelDataHandling(blocks, dim=2, default_target=target)
kernel_execution_jacobi(dh, target)
reduction(dh)
def test_vtk_output():
......@@ -90,7 +85,7 @@ def test_block_iteration():
num_blocks = (2, 2, 2)
blocks = wlb.createUniformBlockGrid(blocks=num_blocks, cellsPerBlock=block_size, oneBlockPerProcess=False)
dh = ParallelDataHandling(blocks, default_ghost_layers=2)
dh.add_array('v', values_per_cell=1, dtype=np.int64, ghost_layers=2, gpu=True)
dh.add_array('v', values_per_cell=1, dtype=np.int64, ghost_layers=2)
for b in dh.iterate():
b['v'].fill(1)
......@@ -113,10 +108,12 @@ def test_block_iteration():
def test_getter_setter():
pytest.importorskip('waLBerla.gpu')
block_size = (2, 2, 2)
num_blocks = (2, 2, 2)
blocks = wlb.createUniformBlockGrid(blocks=num_blocks, cellsPerBlock=block_size, oneBlockPerProcess=False)
dh = ParallelDataHandling(blocks, default_ghost_layers=2)
dh = ParallelDataHandling(blocks, default_ghost_layers=2, default_target=pystencils.Target.GPU)
dh.add_array('v', values_per_cell=1, dtype=np.int64, ghost_layers=2, gpu=True)
assert dh.shape == (4, 4, 4)
......@@ -134,15 +131,20 @@ def test_getter_setter():
def test_parallel_datahandling_boundary_conditions():
pytest.importorskip('waLBerla.cuda')
dh = create_data_handling(domain_size=(7, 7), periodicity=True, parallel=True, default_target=pystencils.Target.GPU)
src = dh.add_array('src')
src2 = dh.add_array('src2')
dh.fill("src", 0.0, ghost_layers=True)
dh.fill("src", 1.0, ghost_layers=False)
src_cpu = dh.add_array('src_cpu', gpu=False)
dh.fill("src_cpu", 0.0, ghost_layers=True)
dh.fill("src_cpu", 1.0, ghost_layers=False)
pytest.importorskip('waLBerla.gpu')
dh = create_data_handling(domain_size=(7, 7), periodicity=True, parallel=True,
default_target=pystencils.Target.GPU)
src = dh.add_array('src', values_per_cell=1)
dh.fill(src.name, 0.0, ghost_layers=True)
dh.fill(src.name, 1.0, ghost_layers=False)
src2 = dh.add_array('src2', values_per_cell=1)
src_cpu = dh.add_array('src_cpu', values_per_cell=1, gpu=False)
dh.fill(src_cpu.name, 0.0, ghost_layers=True)
dh.fill(src_cpu.name, 1.0, ghost_layers=False)
boundary_stencil = [(1, 0), (-1, 0), (0, 1), (0, -1)]
boundary_handling_cpu = BoundaryHandling(dh, src_cpu.name, boundary_stencil,
......@@ -165,10 +167,11 @@ def test_parallel_datahandling_boundary_conditions():
boundary_handling()
dh.all_to_cpu()
for block in dh.iterate():
np.testing.assert_almost_equal(block["src_cpu"], block["src"])
np.testing.assert_almost_equal(block[src_cpu.name], block[src.name])
assert dh.custom_data_names == ('boundary_handling_cpuIndexArrays', 'boundary_handling_gpuIndexArrays')
dh.swap("src", "src2", gpu=True)
dh.swap(src.name, src2.name, gpu=True)
def test_save_data():
domain_shape = (2, 2)
......
%% Cell type:code id: tags:
``` python
from pystencils.session import *
from pystencils.astnodes import Block, Conditional, SympyAssignment
```
%% Cell type:code id: tags:
``` python
src, dst = ps.fields("src, dst: double[2D]", layout='c')
true_block = Block([SympyAssignment(dst[0, 0], src[-1, 0])])
false_block = Block([SympyAssignment(dst[0, 0], src[1, 0])])
ur = [true_block, Conditional(dst.center() > 0.0, true_block, false_block)]
ast = ps.create_kernel(ur)
```
%% Cell type:code id: tags:
``` python
ps.show_code(ast)
```
%% Output
%% Cell type:code id: tags:
``` python
```