Commit a3f37cbd authored by Markus Holzer's avatar Markus Holzer
Browse files

Merge remote-tracking branch 'remotes/origin/Extend_testsuite' into Extend_testsuite

parents 7aa67902 4465a595
......@@ -298,7 +298,7 @@ class CBackend:
return node.get_code(self._dialect, self._vector_instruction_set)
def _print_SourceCodeComment(self, node):
return "/* " + node.text + " */"
return f"/* {node.text } */"
def _print_EmptyLine(self, node):
return ""
......@@ -316,7 +316,7 @@ class CBackend:
result = f"if ({condition_expr})\n{true_block} "
if node.false_block:
false_block = self._print_Block(node.false_block)
result += "else " + false_block
result += f"else {false_block}"
return result
......@@ -336,7 +336,7 @@ class CustomSympyPrinter(CCodePrinter):
return self._typed_number(expr.evalf(), get_type_of_expression(expr))
if expr.exp.is_integer and expr.exp.is_number and 0 < expr.exp < 8:
return "(" + self._print(sp.Mul(*[expr.base] * expr.exp, evaluate=False)) + ")"
return f"({self._print(sp.Mul(*[expr.base] * expr.exp, evaluate=False))})"
elif expr.exp.is_integer and expr.exp.is_number and - 8 < expr.exp < 0:
return f"1 / ({self._print(sp.Mul(*([expr.base] * -expr.exp), evaluate=False))})"
else:
......@@ -589,9 +589,6 @@ class VectorizedCustomSympyPrinter(CustomSympyPrinter):
result = self.instruction_set['&'].format(result, item)
return result
def _print_Max(self, expr):
return "test"
def _print_Or(self, expr):
result = self._scalarFallback('_print_Or', expr)
if result:
......
......@@ -104,7 +104,6 @@ class CudaSympyPrinter(CustomSympyPrinter):
assert len(expr.args) == 1, f"__fsqrt_rn has one argument, but {len(expr.args)} where given"
return f"__fsqrt_rn({self._print(expr.args[0])})"
elif isinstance(expr, fast_inv_sqrt):
print(len(expr.args) == 1)
assert len(expr.args) == 1, f"__frsqrt_rn has one argument, but {len(expr.args)} where given"
return f"__frsqrt_rn({self._print(expr.args[0])})"
return super()._print_Function(expr)
......@@ -86,6 +86,13 @@ class DataHandling(ABC):
Args:
description (str): String description of the fields to add
dtype: data type of the array as numpy data type
ghost_layers: number of ghost layers - if not specified a default value specified in the constructor
is used
layout: memory layout of array, either structure of arrays 'SoA' or array of structures 'AoS'.
this is only important if values_per_cell > 1
cpu: allocate field on the CPU
gpu: allocate field on the GPU, if None, a GPU field is allocated if default_target is 'gpu'
alignment: either False for no alignment, or the number of bytes to align to
Returns:
Fields representing the just created arrays
"""
......@@ -200,6 +207,10 @@ class DataHandling(ABC):
directly passed to the kernel function and override possible parameters from the DataHandling
"""
@abstractmethod
def get_kernel_kwargs(self, kernel_function, **kwargs):
"""Returns the input arguments of a kernel"""
@abstractmethod
def swap(self, name1, name2, gpu=False):
"""Swaps data of two arrays"""
......
......@@ -266,10 +266,10 @@ class SerialDataHandling(DataHandling):
return name in self.gpu_arrays
def synchronization_function_cpu(self, names, stencil_name=None, **_):
return self.synchronization_function(names, stencil_name, 'cpu')
return self.synchronization_function(names, stencil_name, target='cpu')
def synchronization_function_gpu(self, names, stencil_name=None, **_):
return self.synchronization_function(names, stencil_name, 'gpu')
return self.synchronization_function(names, stencil_name, target='gpu')
def synchronization_function(self, names, stencil=None, target=None, **_):
if target is None:
......@@ -425,14 +425,15 @@ class SerialDataHandling(DataHandling):
np.savez_compressed(file, **self.cpu_arrays)
def load_all(self, file):
if '.npz' not in file:
file += '.npz'
file_contents = np.load(file)
for arr_name, arr_contents in self.cpu_arrays.items():
if arr_name not in file_contents:
print(f"Skipping read data {arr_name} because there is no data with this name in data handling")
continue
if file_contents[arr_name].shape != arr_contents.shape:
print("Skipping read data {} because shapes don't match. "
"Read array shape {}, existing array shape {}".format(arr_name, file_contents[arr_name].shape,
arr_contents.shape))
print(f"Skipping read data {arr_name} because shapes don't match. "
f"Read array shape {file_contents[arr_name].shape}, existing array shape {arr_contents.shape}")
continue
np.copyto(arr_contents, file_contents[arr_name])
......@@ -34,6 +34,8 @@ def is_valid(stencil, max_neighborhood=None):
True
>>> is_valid([(2, 0), (1, 0)], max_neighborhood=1)
False
>>> is_valid([(2, 0), (1, 0)], max_neighborhood=2)
True
"""
expected_dim = len(stencil[0])
for d in stencil:
......@@ -67,8 +69,11 @@ def have_same_entries(s1, s2):
Examples:
>>> stencil1 = [(1, 0), (-1, 0), (0, 1), (0, -1)]
>>> stencil2 = [(-1, 0), (0, -1), (1, 0), (0, 1)]
>>> stencil3 = [(-1, 0), (0, -1), (1, 0)]
>>> have_same_entries(stencil1, stencil2)
True
>>> have_same_entries(stencil1, stencil3)
False
"""
if len(s1) != len(s2):
return False
......
......@@ -481,7 +481,7 @@ def count_operations(term: Union[sp.Expr, List[sp.Expr]],
pass
elif t.func is sp.Mul:
if check_type(t):
result['muls'] += len(t.args)
result['muls'] += len(t.args) - 1
for a in t.args:
if a == 1 or a == -1:
result['muls'] -= 1
......@@ -509,7 +509,8 @@ def count_operations(term: Union[sp.Expr, List[sp.Expr]],
if t.exp >= 0:
result['muls'] += int(t.exp) - 1
else:
result['muls'] -= 1
if result['muls'] > 0:
result['muls'] -= 1
result['divs'] += 1
result['muls'] += (-int(t.exp)) - 1
elif sp.nsimplify(t.exp) == sp.Rational(1, 2):
......
......@@ -1206,13 +1206,13 @@ def get_loop_hierarchy(ast_node):
return reversed(result)
def get_loop_counter_symbol_hierarchy(astNode):
def get_loop_counter_symbol_hierarchy(ast_node):
"""Determines the loop counter symbols around a given AST node.
:param astNode: the AST node
:param ast_node: the AST node
:return: list of loop counter symbols, where the first list entry is the symbol of the innermost loop
"""
result = []
node = astNode
node = ast_node
while node is not None:
node = get_next_parent_of_type(node, ast.LoopOverCoordinate)
if node:
......
import sympy as sp
import pystencils as ps
from pystencils import Assignment
from pystencils.astnodes import Block, SkipIteration, LoopOverCoordinate, SympyAssignment
from sympy.codegen.rewriting import optims_c99
dst = ps.fields('dst(8): double[2D]')
s = sp.symbols('s_:8')
x = sp.symbols('x')
y = sp.symbols('y')
def test_kernel_function():
assignments = [
Assignment(dst[0, 0](0), s[0]),
Assignment(x, dst[0, 0](2))
]
ast_node = ps.create_kernel(assignments)
assert ast_node.target == 'cpu'
assert ast_node.backend == 'c'
# symbols_defined and undefined_symbols will always return an emtpy set
assert ast_node.symbols_defined == set()
assert ast_node.undefined_symbols == set()
assert ast_node.fields_written == {dst}
assert ast_node.fields_read == {dst}
def test_skip_iteration():
# skip iteration is an object which should give back empty data structures.
skipped = SkipIteration()
assert skipped.args == []
assert skipped.symbols_defined == set()
assert skipped.undefined_symbols == set()
def test_block():
assignments = [
Assignment(dst[0, 0](0), s[0]),
Assignment(x, dst[0, 0](2))
]
bl = Block(assignments)
assert bl.symbols_defined == {dst[0, 0](0), dst[0, 0](2), s[0], x}
bl.append([Assignment(y, 10)])
assert bl.symbols_defined == {dst[0, 0](0), dst[0, 0](2), s[0], x, y}
assert len(bl.args) == 3
list_iterator = iter([Assignment(s[1], 11)])
bl.insert_front(list_iterator)
assert bl.args[0] == Assignment(s[1], 11)
def test_loop_over_coordinate():
assignments = [
Assignment(dst[0, 0](0), s[0]),
Assignment(x, dst[0, 0](2))
]
body = Block(assignments)
loop = LoopOverCoordinate(body, coordinate_to_loop_over=0, start=0, stop=10, step=1)
assert loop.body == body
new_body = Block([assignments[0]])
loop = loop.new_loop_with_different_body(new_body)
assert loop.body == new_body
assert loop.start == 0
assert loop.stop == 10
assert loop.step == 1
loop.replace(loop.start, 2)
loop.replace(loop.stop, 20)
loop.replace(loop.step, 2)
assert loop.start == 2
assert loop.stop == 20
assert loop.step == 2
def test_sympy_assignment():
assignment = SympyAssignment(dst[0, 0](0), sp.log(x + 3) / sp.log(2) + sp.log(x ** 2 + 1))
assignment.optimize(optims_c99)
ast = ps.create_kernel([assignment])
code = ps.get_code_str(ast)
assert 'log1p' in code
assert 'log2' in code
assignment.replace(assignment.lhs, dst[0, 0](1))
assignment.replace(assignment.rhs, sp.log(2))
assert assignment.lhs == dst[0, 0](1)
assert assignment.rhs == sp.log(2)
import os
from tempfile import TemporaryDirectory
from pathlib import Path
import numpy as np
......@@ -12,6 +13,9 @@ except ImportError:
import unittest.mock
pytest = unittest.mock.MagicMock()
SCRIPT_FOLDER = Path(__file__).parent.absolute()
INPUT_FOLDER = SCRIPT_FOLDER / "test_data"
def basic_iteration(dh):
dh.add_array('basic_iter_test_gl_default')
......@@ -111,6 +115,11 @@ def kernel_execution_jacobi(dh, target):
test_gpu = target == 'gpu' or target == 'opencl'
dh.add_array('f', gpu=test_gpu)
dh.add_array('tmp', gpu=test_gpu)
if test_gpu:
assert dh.is_on_gpu('f')
assert dh.is_on_gpu('tmp')
stencil_2d = [(1, 0), (-1, 0), (0, 1), (0, -1)]
stencil_3d = [(1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1)]
stencil = stencil_2d if dh.dim == 2 else stencil_3d
......@@ -197,6 +206,7 @@ def test_access_and_gather():
def test_kernel():
for domain_shape in [(4, 5), (3, 4, 5)]:
dh = create_data_handling(domain_size=domain_shape, periodicity=True)
assert all(dh.periodicity)
kernel_execution_jacobi(dh, 'cpu')
reduction(dh)
......@@ -243,3 +253,105 @@ def test_add_arrays():
assert y_ == y
assert x == dh.fields['x']
assert y == dh.fields['y']
def test_get_kwarg():
domain_shape = (10, 10)
field_description = 'src, dst'
dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1)
src, dst = dh.add_arrays(field_description)
dh.fill("src", 1.0, ghost_layers=True)
dh.fill("dst", 0.0, ghost_layers=True)
with pytest.raises(ValueError):
dh.add_array('src')
ur = ps.Assignment(src.center, dst.center)
kernel = ps.create_kernel(ur).compile()
kw = dh.get_kernel_kwargs(kernel)
assert np.all(kw[0]['src'] == dh.cpu_arrays['src'])
assert np.all(kw[0]['dst'] == dh.cpu_arrays['dst'])
def test_add_custom_data():
pytest.importorskip('pycuda')
import pycuda.gpuarray as gpuarray
import pycuda.autoinit # noqa
def cpu_data_create_func():
return np.ones((2, 2), dtype=np.float64)
def gpu_data_create_func():
return gpuarray.zeros((2, 2), dtype=np.float64)
def cpu_to_gpu_transfer_func(gpuarr, cpuarray):
gpuarr.set(cpuarray)
def gpu_to_cpu_transfer_func(gpuarr, cpuarray):
gpuarr.get(cpuarray)
dh = create_data_handling(domain_size=(10, 10))
dh.add_custom_data('custom_data',
cpu_data_create_func,
gpu_data_create_func,
cpu_to_gpu_transfer_func,
gpu_to_cpu_transfer_func)
assert np.all(dh.custom_data_cpu['custom_data'] == 1)
assert np.all(dh.custom_data_gpu['custom_data'].get() == 0)
dh.to_cpu(name='custom_data')
dh.to_gpu(name='custom_data')
assert 'custom_data' in dh.custom_data_names
def test_log():
dh = create_data_handling(domain_size=(10, 10))
dh.log_on_root()
assert dh.is_root
assert dh.world_rank == 0
def test_save_data():
domain_shape = (2, 2)
dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1)
dh.add_array("src", values_per_cell=9)
dh.fill("src", 1.0, ghost_layers=True)
dh.add_array("dst", values_per_cell=9)
dh.fill("dst", 1.0, ghost_layers=True)
dh.save_all(str(INPUT_FOLDER) + '/datahandling_save_test')
def test_load_data():
domain_shape = (2, 2)
dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1)
dh.add_array("src", values_per_cell=9)
dh.fill("src", 0.0, ghost_layers=True)
dh.add_array("dst", values_per_cell=9)
dh.fill("dst", 0.0, ghost_layers=True)
dh.load_all(str(INPUT_FOLDER) + '/datahandling_load_test')
assert np.all(dh.cpu_arrays['src']) == 1
assert np.all(dh.cpu_arrays['dst']) == 1
domain_shape = (3, 3)
dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1)
dh.add_array("src", values_per_cell=9)
dh.fill("src", 0.0, ghost_layers=True)
dh.add_array("dst", values_per_cell=9)
dh.fill("dst", 0.0, ghost_layers=True)
dh.add_array("dst2", values_per_cell=9)
dh.fill("dst2", 0.0, ghost_layers=True)
dh.load_all(str(INPUT_FOLDER) + '/datahandling_load_test')
assert np.all(dh.cpu_arrays['src']) == 0
assert np.all(dh.cpu_arrays['dst']) == 0
assert np.all(dh.cpu_arrays['dst2']) == 0
import numpy as np
import waLBerla as wlb
from pystencils import make_slice
from pystencils.datahandling.parallel_datahandling import ParallelDataHandling
from pystencils_tests.test_datahandling import (
access_and_gather, kernel_execution_jacobi, reduction, synchronization, vtk_output)
try:
import pytest
except ImportError:
import unittest.mock
pytest = unittest.mock.MagicMock()
def test_access_and_gather():
block_size = (4, 7, 1)
......@@ -64,3 +71,51 @@ def test_vtk_output():
blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 4), cellsPerBlock=(3, 2, 5), oneBlockPerProcess=False)
dh = ParallelDataHandling(blocks)
vtk_output(dh)
def test_block_iteration():
block_size = (16, 16, 16)
num_blocks = (2, 2, 2)
blocks = wlb.createUniformBlockGrid(blocks=num_blocks, cellsPerBlock=block_size, oneBlockPerProcess=False)
dh = ParallelDataHandling(blocks, default_ghost_layers=2)
dh.add_array('v', values_per_cell=1, dtype=np.int64, ghost_layers=2, gpu=True)
for b in dh.iterate():
b['v'].fill(1)
s = 0
for b in dh.iterate():
s += np.sum(b['v'])
assert s == 40*40*40
sl = make_slice[0:18, 0:18, 0:18]
for b in dh.iterate(slice_obj=sl):
b['v'].fill(0)
s = 0
for b in dh.iterate():
s += np.sum(b['v'])
assert s == 40*40*40 - 20*20*20
def test_getter_setter():
block_size = (2, 2, 2)
num_blocks = (2, 2, 2)
blocks = wlb.createUniformBlockGrid(blocks=num_blocks, cellsPerBlock=block_size, oneBlockPerProcess=False)
dh = ParallelDataHandling(blocks, default_ghost_layers=2)
dh.add_array('v', values_per_cell=1, dtype=np.int64, ghost_layers=2, gpu=True)
assert dh.shape == (4, 4, 4)
assert dh.periodicity == (False, False, False)
assert dh.values_per_cell('v') == 1
assert dh.has_data('v') is True
assert 'v' in dh.array_names
dh.log_on_root()
assert dh.is_root is True
assert dh.world_rank == 0
dh.to_gpu('v')
assert dh.is_on_gpu('v') is True
dh.all_to_cpu()
......@@ -11,9 +11,9 @@ def test_fast_sqrt():
assert len(insert_fast_sqrts(expr).atoms(fast_sqrt)) == 1
assert len(insert_fast_sqrts([expr])[0].atoms(fast_sqrt)) == 1
ast = ps.create_kernel(ps.Assignment(g[0, 0], insert_fast_sqrts(expr)), target='gpu')
ast.compile()
code_str = ps.get_code_str(ast)
ast_gpu = ps.create_kernel(ps.Assignment(g[0, 0], insert_fast_sqrts(expr)), target='gpu')
ast_gpu.compile()
code_str = ps.get_code_str(ast_gpu)
assert '__fsqrt_rn' in code_str
expr = ps.Assignment(sp.Symbol("tmp"), 3 / sp.sqrt(f[0, 0] + f[1, 0]))
......@@ -21,9 +21,9 @@ def test_fast_sqrt():
ac = ps.AssignmentCollection([expr], [])
assert len(insert_fast_sqrts(ac).main_assignments[0].atoms(fast_inv_sqrt)) == 1
ast = ps.create_kernel(insert_fast_sqrts(ac), target='gpu')
ast.compile()
code_str = ps.get_code_str(ast)
ast_gpu = ps.create_kernel(insert_fast_sqrts(ac), target='gpu')
ast_gpu.compile()
code_str = ps.get_code_str(ast_gpu)
assert '__frsqrt_rn' in code_str
......
......@@ -7,7 +7,7 @@ from kerncraft.kernel import KernelCode
from kerncraft.machinemodel import MachineModel
from kerncraft.models import ECM, ECMData, Benchmark
from pystencils import Assignment, Field
from pystencils import Assignment, Field, fields
from pystencils.cpu import create_kernel
from pystencils.kerncraft_coupling import KerncraftParameters, PyStencilsKerncraftKernel
from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark, run_c_benchmark
......@@ -159,3 +159,15 @@ def test_benchmark():
timeloop_time = timeloop.benchmark(number_of_time_steps_for_estimation=1)
np.testing.assert_almost_equal(c_benchmark_run, timeloop_time, decimal=4)
@pytest.mark.kerncraft
def test_kerncraft_generic_field():
a = fields('a: double[3D]')
b = fields('b: double[3D]')
s = sp.Symbol("s")
rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1]
update_rule = Assignment(b[0, 0, 0], s * rhs)
ast = create_kernel([update_rule])
k = PyStencilsKerncraftKernel(ast, debug_print=True)
import sympy as sp
import pystencils as ps
from pystencils import Assignment, AssignmentCollection
from pystencils.simp import (
SimplificationStrategy, apply_on_all_subexpressions,
......@@ -43,3 +44,39 @@ def test_simplification_strategy():
assert 'Adds' in report._repr_html_()
assert 'factor' in str(strategy)
def test_split_inner_loop():
dst = ps.fields('dst(8): double[2D]')
s = sp.symbols('s_:8')
x = sp.symbols('x')
subexpressions = []
main = [
Assignment(dst[0, 0](0), s[0]),
Assignment(dst[0, 0](1), s[1]),
Assignment(dst[0, 0](2), s[2]),
Assignment(dst[0, 0](3), s[3]),
Assignment(dst[0, 0](4), s[4]),
Assignment(dst[0, 0](5), s[5]),
Assignment(dst[0, 0](6), s[6]),
Assignment(dst[0, 0](7), s[7]),
Assignment(x, sum(s))
]
ac = AssignmentCollection(main, subexpressions)
split_groups = [[dst[0, 0](0), dst[0, 0](1)],
[dst[0, 0](2), dst[0, 0](3)],
[dst[0, 0](4), dst[0, 0](5)],
[dst[0, 0](6), dst[0, 0](7), x]]
ac.simplification_hints['split_groups'] = split_groups
ast = ps.create_kernel(ac)
code = ps.get_code_str(ast)
# we have four inner loops as indicated in split groups (4 elements) plus one outer loop
assert code.count('for') == 5
ac = AssignmentCollection(main, subexpressions)
ast = ps.create_kernel(ac)
code = ps.get_code_str(ast)
# one inner loop and one outer loop
assert code.count('for') == 2
import numpy as np
from pystencils import create_data_handling
from pystencils.slicing import SlicedGetter, make_slice, SlicedGetterDataHandling, shift_slice, slice_intersection
def test_sliced_getter():