diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py index 94f3477290ecf79fcd8a095389a752d1505d67e2..aca5e5da1e36774dbd7b69b1a0f0f57012bc4dd6 100644 --- a/pystencils/backends/cbackend.py +++ b/pystencils/backends/cbackend.py @@ -298,7 +298,7 @@ class CBackend: return node.get_code(self._dialect, self._vector_instruction_set) def _print_SourceCodeComment(self, node): - return "/* " + node.text + " */" + return f"/* {node.text } */" def _print_EmptyLine(self, node): return "" @@ -316,7 +316,7 @@ class CBackend: result = f"if ({condition_expr})\n{true_block} " if node.false_block: false_block = self._print_Block(node.false_block) - result += "else " + false_block + result += f"else {false_block}" return result @@ -336,7 +336,7 @@ class CustomSympyPrinter(CCodePrinter): return self._typed_number(expr.evalf(), get_type_of_expression(expr)) if expr.exp.is_integer and expr.exp.is_number and 0 < expr.exp < 8: - return "(" + self._print(sp.Mul(*[expr.base] * expr.exp, evaluate=False)) + ")" + return f"({self._print(sp.Mul(*[expr.base] * expr.exp, evaluate=False))})" elif expr.exp.is_integer and expr.exp.is_number and - 8 < expr.exp < 0: return f"1 / ({self._print(sp.Mul(*([expr.base] * -expr.exp), evaluate=False))})" else: @@ -589,9 +589,6 @@ class VectorizedCustomSympyPrinter(CustomSympyPrinter): result = self.instruction_set['&'].format(result, item) return result - def _print_Max(self, expr): - return "test" - def _print_Or(self, expr): result = self._scalarFallback('_print_Or', expr) if result: diff --git a/pystencils/backends/cuda_backend.py b/pystencils/backends/cuda_backend.py index b43cfb4ededa3ea5c84ff2eef9d434d602f358b2..2d7dc579e932aee814e5207801971246122a9a88 100644 --- a/pystencils/backends/cuda_backend.py +++ b/pystencils/backends/cuda_backend.py @@ -104,7 +104,6 @@ class CudaSympyPrinter(CustomSympyPrinter): assert len(expr.args) == 1, f"__fsqrt_rn has one argument, but {len(expr.args)} where given" return f"__fsqrt_rn({self._print(expr.args[0])})" elif isinstance(expr, fast_inv_sqrt): - print(len(expr.args) == 1) assert len(expr.args) == 1, f"__frsqrt_rn has one argument, but {len(expr.args)} where given" return f"__frsqrt_rn({self._print(expr.args[0])})" return super()._print_Function(expr) diff --git a/pystencils/datahandling/datahandling_interface.py b/pystencils/datahandling/datahandling_interface.py index 272f1660ce1c2a6623fb1bb069b37b16d68042f3..0eb101815ec39788cb2b1b9e720f2628bf087edf 100644 --- a/pystencils/datahandling/datahandling_interface.py +++ b/pystencils/datahandling/datahandling_interface.py @@ -86,6 +86,13 @@ class DataHandling(ABC): Args: description (str): String description of the fields to add dtype: data type of the array as numpy data type + ghost_layers: number of ghost layers - if not specified a default value specified in the constructor + is used + layout: memory layout of array, either structure of arrays 'SoA' or array of structures 'AoS'. + this is only important if values_per_cell > 1 + cpu: allocate field on the CPU + gpu: allocate field on the GPU, if None, a GPU field is allocated if default_target is 'gpu' + alignment: either False for no alignment, or the number of bytes to align to Returns: Fields representing the just created arrays """ @@ -200,6 +207,10 @@ class DataHandling(ABC): directly passed to the kernel function and override possible parameters from the DataHandling """ + @abstractmethod + def get_kernel_kwargs(self, kernel_function, **kwargs): + """Returns the input arguments of a kernel""" + @abstractmethod def swap(self, name1, name2, gpu=False): """Swaps data of two arrays""" diff --git a/pystencils/datahandling/serial_datahandling.py b/pystencils/datahandling/serial_datahandling.py index 7352951d22393d58ca6f6bef52522e5760ba71cb..ce4629f6ab7f35c96cec437ab76b778dfec83e04 100644 --- a/pystencils/datahandling/serial_datahandling.py +++ b/pystencils/datahandling/serial_datahandling.py @@ -266,10 +266,10 @@ class SerialDataHandling(DataHandling): return name in self.gpu_arrays def synchronization_function_cpu(self, names, stencil_name=None, **_): - return self.synchronization_function(names, stencil_name, 'cpu') + return self.synchronization_function(names, stencil_name, target='cpu') def synchronization_function_gpu(self, names, stencil_name=None, **_): - return self.synchronization_function(names, stencil_name, 'gpu') + return self.synchronization_function(names, stencil_name, target='gpu') def synchronization_function(self, names, stencil=None, target=None, **_): if target is None: @@ -425,14 +425,15 @@ class SerialDataHandling(DataHandling): np.savez_compressed(file, **self.cpu_arrays) def load_all(self, file): + if '.npz' not in file: + file += '.npz' file_contents = np.load(file) for arr_name, arr_contents in self.cpu_arrays.items(): if arr_name not in file_contents: print(f"Skipping read data {arr_name} because there is no data with this name in data handling") continue if file_contents[arr_name].shape != arr_contents.shape: - print("Skipping read data {} because shapes don't match. " - "Read array shape {}, existing array shape {}".format(arr_name, file_contents[arr_name].shape, - arr_contents.shape)) + print(f"Skipping read data {arr_name} because shapes don't match. " + f"Read array shape {file_contents[arr_name].shape}, existing array shape {arr_contents.shape}") continue np.copyto(arr_contents, file_contents[arr_name]) diff --git a/pystencils/stencil.py b/pystencils/stencil.py index 359a0a29492c537957b8e24a952fa7e052ee898f..28d179c50d2716e4aa1530b2409abb77e6b10e66 100644 --- a/pystencils/stencil.py +++ b/pystencils/stencil.py @@ -34,6 +34,8 @@ def is_valid(stencil, max_neighborhood=None): True >>> is_valid([(2, 0), (1, 0)], max_neighborhood=1) False + >>> is_valid([(2, 0), (1, 0)], max_neighborhood=2) + True """ expected_dim = len(stencil[0]) for d in stencil: @@ -67,8 +69,11 @@ def have_same_entries(s1, s2): Examples: >>> stencil1 = [(1, 0), (-1, 0), (0, 1), (0, -1)] >>> stencil2 = [(-1, 0), (0, -1), (1, 0), (0, 1)] + >>> stencil3 = [(-1, 0), (0, -1), (1, 0)] >>> have_same_entries(stencil1, stencil2) True + >>> have_same_entries(stencil1, stencil3) + False """ if len(s1) != len(s2): return False diff --git a/pystencils/sympyextensions.py b/pystencils/sympyextensions.py index 55fe74967f6deedeac45233dc198f562d072b485..31e42224ab19f14895e99d7ba9685b5c33a2ef54 100644 --- a/pystencils/sympyextensions.py +++ b/pystencils/sympyextensions.py @@ -481,7 +481,7 @@ def count_operations(term: Union[sp.Expr, List[sp.Expr]], pass elif t.func is sp.Mul: if check_type(t): - result['muls'] += len(t.args) + result['muls'] += len(t.args) - 1 for a in t.args: if a == 1 or a == -1: result['muls'] -= 1 @@ -509,7 +509,8 @@ def count_operations(term: Union[sp.Expr, List[sp.Expr]], if t.exp >= 0: result['muls'] += int(t.exp) - 1 else: - result['muls'] -= 1 + if result['muls'] > 0: + result['muls'] -= 1 result['divs'] += 1 result['muls'] += (-int(t.exp)) - 1 elif sp.nsimplify(t.exp) == sp.Rational(1, 2): diff --git a/pystencils/transformations.py b/pystencils/transformations.py index b3f9431bbf3035aaabd25f6eb430c738dddaf3a7..5e306f2de168994575562a92156408f128ab447c 100644 --- a/pystencils/transformations.py +++ b/pystencils/transformations.py @@ -1206,13 +1206,13 @@ def get_loop_hierarchy(ast_node): return reversed(result) -def get_loop_counter_symbol_hierarchy(astNode): +def get_loop_counter_symbol_hierarchy(ast_node): """Determines the loop counter symbols around a given AST node. - :param astNode: the AST node + :param ast_node: the AST node :return: list of loop counter symbols, where the first list entry is the symbol of the innermost loop """ result = [] - node = astNode + node = ast_node while node is not None: node = get_next_parent_of_type(node, ast.LoopOverCoordinate) if node: diff --git a/pystencils_tests/test_astnodes.py b/pystencils_tests/test_astnodes.py new file mode 100644 index 0000000000000000000000000000000000000000..98c755efd4d5b57e0b33f47ffe5d7fb2e11df573 --- /dev/null +++ b/pystencils_tests/test_astnodes.py @@ -0,0 +1,100 @@ +import sympy as sp +import pystencils as ps + +from pystencils import Assignment +from pystencils.astnodes import Block, SkipIteration, LoopOverCoordinate, SympyAssignment +from sympy.codegen.rewriting import optims_c99 + +dst = ps.fields('dst(8): double[2D]') +s = sp.symbols('s_:8') +x = sp.symbols('x') +y = sp.symbols('y') + + +def test_kernel_function(): + assignments = [ + Assignment(dst[0, 0](0), s[0]), + Assignment(x, dst[0, 0](2)) + ] + + ast_node = ps.create_kernel(assignments) + + assert ast_node.target == 'cpu' + assert ast_node.backend == 'c' + # symbols_defined and undefined_symbols will always return an emtpy set + assert ast_node.symbols_defined == set() + assert ast_node.undefined_symbols == set() + assert ast_node.fields_written == {dst} + assert ast_node.fields_read == {dst} + + +def test_skip_iteration(): + # skip iteration is an object which should give back empty data structures. + skipped = SkipIteration() + assert skipped.args == [] + assert skipped.symbols_defined == set() + assert skipped.undefined_symbols == set() + + +def test_block(): + assignments = [ + Assignment(dst[0, 0](0), s[0]), + Assignment(x, dst[0, 0](2)) + ] + bl = Block(assignments) + assert bl.symbols_defined == {dst[0, 0](0), dst[0, 0](2), s[0], x} + + bl.append([Assignment(y, 10)]) + assert bl.symbols_defined == {dst[0, 0](0), dst[0, 0](2), s[0], x, y} + assert len(bl.args) == 3 + + list_iterator = iter([Assignment(s[1], 11)]) + bl.insert_front(list_iterator) + + assert bl.args[0] == Assignment(s[1], 11) + + +def test_loop_over_coordinate(): + assignments = [ + Assignment(dst[0, 0](0), s[0]), + Assignment(x, dst[0, 0](2)) + ] + + body = Block(assignments) + loop = LoopOverCoordinate(body, coordinate_to_loop_over=0, start=0, stop=10, step=1) + + assert loop.body == body + + new_body = Block([assignments[0]]) + loop = loop.new_loop_with_different_body(new_body) + assert loop.body == new_body + + assert loop.start == 0 + assert loop.stop == 10 + assert loop.step == 1 + + loop.replace(loop.start, 2) + loop.replace(loop.stop, 20) + loop.replace(loop.step, 2) + + assert loop.start == 2 + assert loop.stop == 20 + assert loop.step == 2 + + +def test_sympy_assignment(): + + assignment = SympyAssignment(dst[0, 0](0), sp.log(x + 3) / sp.log(2) + sp.log(x ** 2 + 1)) + assignment.optimize(optims_c99) + + ast = ps.create_kernel([assignment]) + code = ps.get_code_str(ast) + + assert 'log1p' in code + assert 'log2' in code + + assignment.replace(assignment.lhs, dst[0, 0](1)) + assignment.replace(assignment.rhs, sp.log(2)) + + assert assignment.lhs == dst[0, 0](1) + assert assignment.rhs == sp.log(2) diff --git a/pystencils_tests/test_data/datahandling_load_test.npz b/pystencils_tests/test_data/datahandling_load_test.npz new file mode 100644 index 0000000000000000000000000000000000000000..d363a8a0aba1bb78a06314a19b887eb4c4975334 Binary files /dev/null and b/pystencils_tests/test_data/datahandling_load_test.npz differ diff --git a/pystencils_tests/test_data/datahandling_save_test.npz b/pystencils_tests/test_data/datahandling_save_test.npz new file mode 100644 index 0000000000000000000000000000000000000000..d363a8a0aba1bb78a06314a19b887eb4c4975334 Binary files /dev/null and b/pystencils_tests/test_data/datahandling_save_test.npz differ diff --git a/pystencils_tests/test_datahandling.py b/pystencils_tests/test_datahandling.py index 7f95fe1f04d1bbf72770d2837bf6723681c1f50d..949f067896d3374a7fcb9ecb1369fe5a37eb1f9a 100644 --- a/pystencils_tests/test_datahandling.py +++ b/pystencils_tests/test_datahandling.py @@ -1,5 +1,6 @@ import os from tempfile import TemporaryDirectory +from pathlib import Path import numpy as np @@ -12,6 +13,9 @@ except ImportError: import unittest.mock pytest = unittest.mock.MagicMock() +SCRIPT_FOLDER = Path(__file__).parent.absolute() +INPUT_FOLDER = SCRIPT_FOLDER / "test_data" + def basic_iteration(dh): dh.add_array('basic_iter_test_gl_default') @@ -111,6 +115,11 @@ def kernel_execution_jacobi(dh, target): test_gpu = target == 'gpu' or target == 'opencl' dh.add_array('f', gpu=test_gpu) dh.add_array('tmp', gpu=test_gpu) + + if test_gpu: + assert dh.is_on_gpu('f') + assert dh.is_on_gpu('tmp') + stencil_2d = [(1, 0), (-1, 0), (0, 1), (0, -1)] stencil_3d = [(1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1)] stencil = stencil_2d if dh.dim == 2 else stencil_3d @@ -197,6 +206,7 @@ def test_access_and_gather(): def test_kernel(): for domain_shape in [(4, 5), (3, 4, 5)]: dh = create_data_handling(domain_size=domain_shape, periodicity=True) + assert all(dh.periodicity) kernel_execution_jacobi(dh, 'cpu') reduction(dh) @@ -243,3 +253,105 @@ def test_add_arrays(): assert y_ == y assert x == dh.fields['x'] assert y == dh.fields['y'] + + +def test_get_kwarg(): + domain_shape = (10, 10) + field_description = 'src, dst' + + dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1) + src, dst = dh.add_arrays(field_description) + dh.fill("src", 1.0, ghost_layers=True) + dh.fill("dst", 0.0, ghost_layers=True) + + with pytest.raises(ValueError): + dh.add_array('src') + + ur = ps.Assignment(src.center, dst.center) + kernel = ps.create_kernel(ur).compile() + + kw = dh.get_kernel_kwargs(kernel) + assert np.all(kw[0]['src'] == dh.cpu_arrays['src']) + assert np.all(kw[0]['dst'] == dh.cpu_arrays['dst']) + + +def test_add_custom_data(): + pytest.importorskip('pycuda') + + import pycuda.gpuarray as gpuarray + import pycuda.autoinit # noqa + + def cpu_data_create_func(): + return np.ones((2, 2), dtype=np.float64) + + def gpu_data_create_func(): + return gpuarray.zeros((2, 2), dtype=np.float64) + + def cpu_to_gpu_transfer_func(gpuarr, cpuarray): + gpuarr.set(cpuarray) + + def gpu_to_cpu_transfer_func(gpuarr, cpuarray): + gpuarr.get(cpuarray) + + dh = create_data_handling(domain_size=(10, 10)) + dh.add_custom_data('custom_data', + cpu_data_create_func, + gpu_data_create_func, + cpu_to_gpu_transfer_func, + gpu_to_cpu_transfer_func) + + assert np.all(dh.custom_data_cpu['custom_data'] == 1) + assert np.all(dh.custom_data_gpu['custom_data'].get() == 0) + + dh.to_cpu(name='custom_data') + dh.to_gpu(name='custom_data') + + assert 'custom_data' in dh.custom_data_names + + +def test_log(): + dh = create_data_handling(domain_size=(10, 10)) + dh.log_on_root() + assert dh.is_root + assert dh.world_rank == 0 + + +def test_save_data(): + domain_shape = (2, 2) + + dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1) + dh.add_array("src", values_per_cell=9) + dh.fill("src", 1.0, ghost_layers=True) + dh.add_array("dst", values_per_cell=9) + dh.fill("dst", 1.0, ghost_layers=True) + + dh.save_all(str(INPUT_FOLDER) + '/datahandling_save_test') + + +def test_load_data(): + domain_shape = (2, 2) + + dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1) + dh.add_array("src", values_per_cell=9) + dh.fill("src", 0.0, ghost_layers=True) + dh.add_array("dst", values_per_cell=9) + dh.fill("dst", 0.0, ghost_layers=True) + + dh.load_all(str(INPUT_FOLDER) + '/datahandling_load_test') + assert np.all(dh.cpu_arrays['src']) == 1 + assert np.all(dh.cpu_arrays['dst']) == 1 + + domain_shape = (3, 3) + + dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1) + dh.add_array("src", values_per_cell=9) + dh.fill("src", 0.0, ghost_layers=True) + dh.add_array("dst", values_per_cell=9) + dh.fill("dst", 0.0, ghost_layers=True) + dh.add_array("dst2", values_per_cell=9) + dh.fill("dst2", 0.0, ghost_layers=True) + + dh.load_all(str(INPUT_FOLDER) + '/datahandling_load_test') + assert np.all(dh.cpu_arrays['src']) == 0 + assert np.all(dh.cpu_arrays['dst']) == 0 + assert np.all(dh.cpu_arrays['dst2']) == 0 diff --git a/pystencils_tests/test_datahandling_parallel.py b/pystencils_tests/test_datahandling_parallel.py index 0bfbfbd02fc86952e7fba84ce6aa080d2080ec0a..efe106dc0090bf69dff0d187008c1a8653c4a98e 100644 --- a/pystencils_tests/test_datahandling_parallel.py +++ b/pystencils_tests/test_datahandling_parallel.py @@ -1,10 +1,17 @@ import numpy as np import waLBerla as wlb +from pystencils import make_slice from pystencils.datahandling.parallel_datahandling import ParallelDataHandling from pystencils_tests.test_datahandling import ( access_and_gather, kernel_execution_jacobi, reduction, synchronization, vtk_output) +try: + import pytest +except ImportError: + import unittest.mock + pytest = unittest.mock.MagicMock() + def test_access_and_gather(): block_size = (4, 7, 1) @@ -64,3 +71,51 @@ def test_vtk_output(): blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 4), cellsPerBlock=(3, 2, 5), oneBlockPerProcess=False) dh = ParallelDataHandling(blocks) vtk_output(dh) + + +def test_block_iteration(): + block_size = (16, 16, 16) + num_blocks = (2, 2, 2) + blocks = wlb.createUniformBlockGrid(blocks=num_blocks, cellsPerBlock=block_size, oneBlockPerProcess=False) + dh = ParallelDataHandling(blocks, default_ghost_layers=2) + dh.add_array('v', values_per_cell=1, dtype=np.int64, ghost_layers=2, gpu=True) + + for b in dh.iterate(): + b['v'].fill(1) + + s = 0 + for b in dh.iterate(): + s += np.sum(b['v']) + + assert s == 40*40*40 + + sl = make_slice[0:18, 0:18, 0:18] + for b in dh.iterate(slice_obj=sl): + b['v'].fill(0) + + s = 0 + for b in dh.iterate(): + s += np.sum(b['v']) + + assert s == 40*40*40 - 20*20*20 + + +def test_getter_setter(): + block_size = (2, 2, 2) + num_blocks = (2, 2, 2) + blocks = wlb.createUniformBlockGrid(blocks=num_blocks, cellsPerBlock=block_size, oneBlockPerProcess=False) + dh = ParallelDataHandling(blocks, default_ghost_layers=2) + dh.add_array('v', values_per_cell=1, dtype=np.int64, ghost_layers=2, gpu=True) + + assert dh.shape == (4, 4, 4) + assert dh.periodicity == (False, False, False) + assert dh.values_per_cell('v') == 1 + assert dh.has_data('v') is True + assert 'v' in dh.array_names + dh.log_on_root() + assert dh.is_root is True + assert dh.world_rank == 0 + + dh.to_gpu('v') + assert dh.is_on_gpu('v') is True + dh.all_to_cpu() diff --git a/pystencils_tests/test_fast_approximation.py b/pystencils_tests/test_fast_approximation.py index f4d19fa19fa615aaa76a7b1bb445fa7bdb886237..ccd2d7b8ea15a4c36069a58bc1197b314cce2ca1 100644 --- a/pystencils_tests/test_fast_approximation.py +++ b/pystencils_tests/test_fast_approximation.py @@ -11,9 +11,9 @@ def test_fast_sqrt(): assert len(insert_fast_sqrts(expr).atoms(fast_sqrt)) == 1 assert len(insert_fast_sqrts([expr])[0].atoms(fast_sqrt)) == 1 - ast = ps.create_kernel(ps.Assignment(g[0, 0], insert_fast_sqrts(expr)), target='gpu') - ast.compile() - code_str = ps.get_code_str(ast) + ast_gpu = ps.create_kernel(ps.Assignment(g[0, 0], insert_fast_sqrts(expr)), target='gpu') + ast_gpu.compile() + code_str = ps.get_code_str(ast_gpu) assert '__fsqrt_rn' in code_str expr = ps.Assignment(sp.Symbol("tmp"), 3 / sp.sqrt(f[0, 0] + f[1, 0])) @@ -21,9 +21,9 @@ def test_fast_sqrt(): ac = ps.AssignmentCollection([expr], []) assert len(insert_fast_sqrts(ac).main_assignments[0].atoms(fast_inv_sqrt)) == 1 - ast = ps.create_kernel(insert_fast_sqrts(ac), target='gpu') - ast.compile() - code_str = ps.get_code_str(ast) + ast_gpu = ps.create_kernel(insert_fast_sqrts(ac), target='gpu') + ast_gpu.compile() + code_str = ps.get_code_str(ast_gpu) assert '__frsqrt_rn' in code_str diff --git a/pystencils_tests/test_kerncraft_coupling.py b/pystencils_tests/test_kerncraft_coupling.py index 653ed34d90e6ecd45a7a5785fb71d522cc3734f5..aeb4b7acb6f62730f0896b10d6b8fa6d655c871d 100644 --- a/pystencils_tests/test_kerncraft_coupling.py +++ b/pystencils_tests/test_kerncraft_coupling.py @@ -7,7 +7,7 @@ from kerncraft.kernel import KernelCode from kerncraft.machinemodel import MachineModel from kerncraft.models import ECM, ECMData, Benchmark -from pystencils import Assignment, Field +from pystencils import Assignment, Field, fields from pystencils.cpu import create_kernel from pystencils.kerncraft_coupling import KerncraftParameters, PyStencilsKerncraftKernel from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark, run_c_benchmark @@ -159,3 +159,15 @@ def test_benchmark(): timeloop_time = timeloop.benchmark(number_of_time_steps_for_estimation=1) np.testing.assert_almost_equal(c_benchmark_run, timeloop_time, decimal=4) + + +@pytest.mark.kerncraft +def test_kerncraft_generic_field(): + a = fields('a: double[3D]') + b = fields('b: double[3D]') + s = sp.Symbol("s") + rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1] + + update_rule = Assignment(b[0, 0, 0], s * rhs) + ast = create_kernel([update_rule]) + k = PyStencilsKerncraftKernel(ast, debug_print=True) diff --git a/pystencils_tests/test_simplification_strategy.py b/pystencils_tests/test_simplification_strategy.py index 189482c006197160e1f49771dc534206f8d0ef9e..5176ae5f49aa45ed952882cb0313d5e5f7754177 100644 --- a/pystencils_tests/test_simplification_strategy.py +++ b/pystencils_tests/test_simplification_strategy.py @@ -1,5 +1,6 @@ import sympy as sp +import pystencils as ps from pystencils import Assignment, AssignmentCollection from pystencils.simp import ( SimplificationStrategy, apply_on_all_subexpressions, @@ -43,3 +44,39 @@ def test_simplification_strategy(): assert 'Adds' in report._repr_html_() assert 'factor' in str(strategy) + + +def test_split_inner_loop(): + dst = ps.fields('dst(8): double[2D]') + s = sp.symbols('s_:8') + x = sp.symbols('x') + subexpressions = [] + main = [ + Assignment(dst[0, 0](0), s[0]), + Assignment(dst[0, 0](1), s[1]), + Assignment(dst[0, 0](2), s[2]), + Assignment(dst[0, 0](3), s[3]), + Assignment(dst[0, 0](4), s[4]), + Assignment(dst[0, 0](5), s[5]), + Assignment(dst[0, 0](6), s[6]), + Assignment(dst[0, 0](7), s[7]), + Assignment(x, sum(s)) + ] + ac = AssignmentCollection(main, subexpressions) + split_groups = [[dst[0, 0](0), dst[0, 0](1)], + [dst[0, 0](2), dst[0, 0](3)], + [dst[0, 0](4), dst[0, 0](5)], + [dst[0, 0](6), dst[0, 0](7), x]] + ac.simplification_hints['split_groups'] = split_groups + ast = ps.create_kernel(ac) + + code = ps.get_code_str(ast) + # we have four inner loops as indicated in split groups (4 elements) plus one outer loop + assert code.count('for') == 5 + + ac = AssignmentCollection(main, subexpressions) + ast = ps.create_kernel(ac) + + code = ps.get_code_str(ast) + # one inner loop and one outer loop + assert code.count('for') == 2 diff --git a/pystencils_tests/test_slicing.py b/pystencils_tests/test_slicing.py new file mode 100644 index 0000000000000000000000000000000000000000..79e36576bfb622cae3f4f9ed865a8b2f8308430b --- /dev/null +++ b/pystencils_tests/test_slicing.py @@ -0,0 +1,73 @@ +import numpy as np +from pystencils import create_data_handling +from pystencils.slicing import SlicedGetter, make_slice, SlicedGetterDataHandling, shift_slice, slice_intersection + + +def test_sliced_getter(): + def get_slice(slice_obj=None): + arr = np.ones((10, 10)) + if slice_obj is None: + slice_obj = make_slice[:, :] + + return arr[slice_obj] + + sli = SlicedGetter(get_slice) + + test = make_slice[2:-2, 2:-2] + assert sli[test].shape == (6, 6) + + +def test_sliced_getter_data_handling(): + domain_shape = (10, 10) + + dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1) + dh.add_array("src", values_per_cell=1) + dh.fill("src", 1.0, ghost_layers=True) + + dh.add_array("dst", values_per_cell=1) + dh.fill("dst", 0.0, ghost_layers=True) + + sli = SlicedGetterDataHandling(dh, 'dst') + slice_obj = make_slice[2:-2, 2:-2] + assert np.sum(sli[slice_obj]) == 0 + + sli = SlicedGetterDataHandling(dh, 'src') + slice_obj = make_slice[2:-2, 2:-2] + assert np.sum(sli[slice_obj]) == 36 + + +def test_shift_slice(): + + sh = shift_slice(make_slice[2:-2, 2:-2], [1, 2]) + assert sh[0] == slice(3, -1, None) + assert sh[1] == slice(4, 0, None) + + sh = shift_slice(make_slice[2:-2, 2:-2], 1) + assert sh[0] == slice(3, -1, None) + assert sh[1] == slice(3, -1, None) + + sh = shift_slice([2, 4], 1) + assert sh[0] == 3 + assert sh[1] == 5 + + sh = shift_slice([2, None], 1) + assert sh[0] == 3 + assert sh[1] is None + + sh = shift_slice([1.5, 1.5], 1) + assert sh[0] == 1.5 + assert sh[1] == 1.5 + + +def test_slice_intersection(): + sl1 = make_slice[1:10, 1:10] + sl2 = make_slice[5:15, 5:15] + + intersection = slice_intersection(sl1, sl2) + assert intersection[0] == slice(5, 10, None) + assert intersection[1] == slice(5, 10, None) + + sl2 = make_slice[12:15, 12:15] + + intersection = slice_intersection(sl1, sl2) + assert intersection is None diff --git a/pystencils_tests/test_staggered_kernel.py b/pystencils_tests/test_staggered_kernel.py index c4f491393d2b4d08fbb9b4c99356abdc7e7199b6..eecbf706c5538ad544d29a6b37366944aa504680 100644 --- a/pystencils_tests/test_staggered_kernel.py +++ b/pystencils_tests/test_staggered_kernel.py @@ -4,6 +4,7 @@ import sympy as sp import pytest import pystencils as ps +from pystencils import x_staggered_vector, TypedSymbol class TestStaggeredDiffusion: @@ -96,3 +97,12 @@ def test_staggered_loop_cutting(): assignments = [ps.Assignment(j.staggered_access("SW"), 1)] ast = ps.create_staggered_kernel(assignments, target=dh.default_target) assert not ast.atoms(ps.astnodes.Conditional) + + +def test_staggered_vector(): + dim = 2 + v = x_staggered_vector(dim) + ctr0 = TypedSymbol('ctr_0', 'int', nonnegative=True) + ctr1 = TypedSymbol('ctr_1', 'int', nonnegative=True) + expected_result = sp.Matrix(tuple((ctr0 + 0.5, ctr1 + 0.5))) + assert v == expected_result \ No newline at end of file diff --git a/pystencils_tests/test_stencils.py b/pystencils_tests/test_stencils.py index bf1418e053d41ebeb80b6ae623f1834b24a8a713..fa205ad7f04a446e1e16d5ea95e679cea70ecde4 100644 --- a/pystencils_tests/test_stencils.py +++ b/pystencils_tests/test_stencils.py @@ -1 +1,30 @@ import pystencils as ps +import sympy as sp + +from pystencils.stencil import coefficient_list, plot_expression + + +def test_coefficient_list(): + f = ps.fields("f: double[1D]") + expr = 2 * f[1] + 3 * f[-1] + coff = coefficient_list(expr) + assert coff == [3, 0, 2] + plot_expression(expr, matrix_form=True) + + f = ps.fields("f: double[3D]") + expr = 2 * f[1, 0, 0] + 3 * f[0, -1, 0] + coff = coefficient_list(expr) + assert coff == [[[0, 3, 0], [0, 0, 2], [0, 0, 0]]] + + expr = 2 * f[1, 0, 0] + 3 * f[0, -1, 0] + 4 * f[0, 0, 1] + coff = coefficient_list(expr, matrix_form=True) + assert coff[0] == sp.zeros(3, 3) + + # in 3D plot only works if there are entries on every of the three 2D planes. In the above examples z-1 was empty + expr = 2 * f[1, 0, 0] + 1 * f[0, -1, 0] + 1 * f[0, 0, 1] + f[0, 0, -1] + plot_expression(expr) + + +def test_plot_expression(): + f = ps.fields("f: double[2D]") + plot_expression(2 * f[1, 0] + 3 * f[0, -1], matrix_form=True) \ No newline at end of file diff --git a/pystencils_tests/test_sympyextensions.py b/pystencils_tests/test_sympyextensions.py index 1636df632670a4a909321999067389bdaaa56a62..2135ee88e7ee5d9375ce5213a2bdb953bea3d5b8 100644 --- a/pystencils_tests/test_sympyextensions.py +++ b/pystencils_tests/test_sympyextensions.py @@ -119,7 +119,7 @@ def test_count_operations(): expr = sympy.Pow(1/x + y * sympy.sqrt(z), 100) ops = count_operations(expr, only_type=None) assert ops['adds'] == 1 - assert ops['muls'] == 100 + assert ops['muls'] == 99 assert ops['divs'] == 1 assert ops['sqrts'] == 1 diff --git a/pystencils/test_type_interference.py b/pystencils_tests/test_type_interference.py similarity index 100% rename from pystencils/test_type_interference.py rename to pystencils_tests/test_type_interference.py diff --git a/pystencils_tests/test_vectorization.py b/pystencils_tests/test_vectorization.py index 1fa1812eb8fced19d114ef3cb32900ec9b93f995..3cc1be4f36b126baf19d074657b4394cdd2cefbb 100644 --- a/pystencils_tests/test_vectorization.py +++ b/pystencils_tests/test_vectorization.py @@ -4,6 +4,7 @@ import sympy as sp import pystencils as ps from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets from pystencils.cpu.vectorization import vectorize +from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions from pystencils.transformations import replace_inner_stride_with_one @@ -109,7 +110,6 @@ def test_piecewise1(): def test_piecewise2(): - arr = np.zeros((20, 20)) @ps.kernel @@ -128,7 +128,6 @@ def test_piecewise2(): def test_piecewise3(): - arr = np.zeros((22, 22)) @ps.kernel @@ -146,12 +145,32 @@ def test_logical_operators(): arr = np.zeros((22, 22)) @ps.kernel - def test_kernel(s): + def kernel_and(s): f, g = ps.fields(f=arr, g=arr) s.c @= sp.And(f[0, 1] < 0.0, f[1, 0] < 0.0) g[0, 0] @= sp.Piecewise([1.0 / f[1, 0], s.c], [1.0, True]) - ast = ps.create_kernel(test_kernel) + ast = ps.create_kernel(kernel_and) + vectorize(ast) + ast.compile() + + @ps.kernel + def kernel_or(s): + f, g = ps.fields(f=arr, g=arr) + s.c @= sp.Or(f[0, 1] < 0.0, f[1, 0] < 0.0) + g[0, 0] @= sp.Piecewise([1.0 / f[1, 0], s.c], [1.0, True]) + + ast = ps.create_kernel(kernel_or) + vectorize(ast) + ast.compile() + + @ps.kernel + def kernel_equal(s): + f, g = ps.fields(f=arr, g=arr) + s.c @= sp.Eq(f[0, 1], 2.0) + g[0, 0] @= sp.Piecewise([1.0 / f[1, 0], s.c], [1.0, True]) + + ast = ps.create_kernel(kernel_equal) vectorize(ast) ast.compile() @@ -159,3 +178,61 @@ def test_logical_operators(): def test_hardware_query(): instruction_sets = get_supported_instruction_sets() assert 'sse' in instruction_sets + + +def test_vectorised_pow(): + arr = np.zeros((24, 24)) + f, g = ps.fields(f=arr, g=arr) + + as1 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], 2)) + as2 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], 0.5)) + as3 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], -0.5)) + as4 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], 4)) + as5 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], -4)) + as6 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], -1)) + + ast = ps.create_kernel(as1) + vectorize(ast) + ast.compile() + + ast = ps.create_kernel(as2) + vectorize(ast) + ast.compile() + + ast = ps.create_kernel(as3) + vectorize(ast) + ast.compile() + + ast = ps.create_kernel(as4) + vectorize(ast) + ast.compile() + + ast = ps.create_kernel(as5) + vectorize(ast) + ast.compile() + + ast = ps.create_kernel(as6) + vectorize(ast) + ast.compile() + + +def test_vectorised_fast_approximations(): + arr = np.zeros((24, 24)) + f, g = ps.fields(f=arr, g=arr) + + expr = sp.sqrt(f[0, 0] + f[1, 0]) + assignment = ps.Assignment(g[0, 0], insert_fast_sqrts(expr)) + ast = ps.create_kernel(assignment) + vectorize(ast) + ast.compile() + + expr = f[0, 0] / f[1, 0] + assignment = ps.Assignment(g[0, 0], insert_fast_divisions(expr)) + ast = ps.create_kernel(assignment) + vectorize(ast) + ast.compile() + + assignment = ps.Assignment(sp.Symbol("tmp"), 3 / sp.sqrt(f[0, 0] + f[1, 0])) + ast = ps.create_kernel(insert_fast_sqrts(assignment)) + vectorize(ast) + ast.compile()