From 9159b1f5c139a993f896a32d8e47f95b585103de Mon Sep 17 00:00:00 2001 From: Markus Holzer <markus.holzer@fau.de> Date: Tue, 5 Jul 2022 07:41:56 +0200 Subject: [PATCH] Minor Fixes for Codegen applications --- .../PhaseFieldAllenCahn/multiphase_codegen.py | 4 +++- apps/benchmarks/UniformGridCPU/UniformGridCPU.py | 2 +- tests/field/codegen/CodegenPoissonCPU.cpp | 2 +- tests/field/codegen/Poisson.py | 10 +++++----- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py b/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py index e9bfa1dae..e8b28a4e5 100644 --- a/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py +++ b/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py @@ -123,7 +123,9 @@ with CodeGeneration() as ctx: # GENERATE SWEEPS # ################### - cpu_vec = {'assume_inner_stride_one': True, 'nontemporal': True} + # by default NT Stores are deactivated because they do not work in all cases + # must be activated to achieve full potential for example on AVX512 CPUs + cpu_vec = {'assume_inner_stride_one': True, 'nontemporal': False} vp = [('int32_t', 'cudaBlockSize0'), ('int32_t', 'cudaBlockSize1'), diff --git a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py index c8f500174..f231da23c 100644 --- a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py +++ b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py @@ -97,7 +97,7 @@ with CodeGeneration() as ctx: openmp = True if ctx.openmp else False field_type = "float64" if ctx.double_accuracy else "float32" if ctx.optimize_for_localhost: - cpu_vec = {"nontemporal": True, "assume_aligned": True} + cpu_vec = {"nontemporal": False, "assume_aligned": True} else: cpu_vec = None diff --git a/tests/field/codegen/CodegenPoissonCPU.cpp b/tests/field/codegen/CodegenPoissonCPU.cpp index 6c5696d40..582195ca4 100644 --- a/tests/field/codegen/CodegenPoissonCPU.cpp +++ b/tests/field/codegen/CodegenPoissonCPU.cpp @@ -109,7 +109,7 @@ void testPoisson() // Registering the sweep timeloop.add() << BeforeFunction( commScheme, "Communication" ) - << Sweep( pystencils::Poisson(fId, fieldID, dx, dy), "Poisson Kernel" ); + << Sweep( pystencils::Poisson(fId, fieldID, dx*dx, dy*dy), "Poisson Kernel" ); timeloop.run(); diff --git a/tests/field/codegen/Poisson.py b/tests/field/codegen/Poisson.py index a4202ad5a..8e27d5f8d 100644 --- a/tests/field/codegen/Poisson.py +++ b/tests/field/codegen/Poisson.py @@ -6,14 +6,14 @@ from pystencils_walberla import CodeGeneration, generate_sweep with CodeGeneration() as ctx: field_type = "float64" if ctx.double_accuracy else "float32" # ----- Solving the 2D Poisson equation with rhs -------------------------- - dx = sp.Symbol("dx") - dy = sp.Symbol("dy") + dx2 = sp.Symbol("dx_square") + dy2 = sp.Symbol("dy_square") src, dst, rhs = ps.fields(f"src, src_tmp, rhs: {field_type}[2D]", layout='fzyx') @ps.kernel def kernel_func(): - src[0, 0] @= ((dy**2 * (src[1, 0] + src[-1, 0])) - + (dx**2 * (src[0, 1] + src[0, -1])) - - (rhs[0, 0] * dx**2 * dy**2)) / (2 * (dx**2 + dy**2)) + src[0, 0] @= ((dy2 * (src[1, 0] + src[-1, 0])) + + (dx2 * (src[0, 1] + src[0, -1])) + - (rhs[0, 0] * dx2 * dy2)) / (2.0 * (dx2 + dy2)) generate_sweep(ctx, 'Poisson', kernel_func) -- GitLab