diff --git a/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py b/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py index e9bfa1daed825dc76373181ad3634b93c3b82fa3..e8b28a4e5402d9122e6e4cf34c28248ca6e13b6b 100644 --- a/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py +++ b/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py @@ -123,7 +123,9 @@ with CodeGeneration() as ctx: # GENERATE SWEEPS # ################### - cpu_vec = {'assume_inner_stride_one': True, 'nontemporal': True} + # by default NT Stores are deactivated because they do not work in all cases + # must be activated to achieve full potential for example on AVX512 CPUs + cpu_vec = {'assume_inner_stride_one': True, 'nontemporal': False} vp = [('int32_t', 'cudaBlockSize0'), ('int32_t', 'cudaBlockSize1'), diff --git a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py index c8f5001743712e239d912820272ddde0b41f2c04..f231da23c9a7118ec455b3eb9c3bd4f0e9aca965 100644 --- a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py +++ b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py @@ -97,7 +97,7 @@ with CodeGeneration() as ctx: openmp = True if ctx.openmp else False field_type = "float64" if ctx.double_accuracy else "float32" if ctx.optimize_for_localhost: - cpu_vec = {"nontemporal": True, "assume_aligned": True} + cpu_vec = {"nontemporal": False, "assume_aligned": True} else: cpu_vec = None diff --git a/tests/field/codegen/CodegenPoissonCPU.cpp b/tests/field/codegen/CodegenPoissonCPU.cpp index 6c5696d40debea115698dccfefb466dee10f39b4..582195ca4dbcb446dada56517ee1eda767476178 100644 --- a/tests/field/codegen/CodegenPoissonCPU.cpp +++ b/tests/field/codegen/CodegenPoissonCPU.cpp @@ -109,7 +109,7 @@ void testPoisson() // Registering the sweep timeloop.add() << BeforeFunction( commScheme, "Communication" ) - << Sweep( pystencils::Poisson(fId, fieldID, dx, dy), "Poisson Kernel" ); + << Sweep( pystencils::Poisson(fId, fieldID, dx*dx, dy*dy), "Poisson Kernel" ); timeloop.run(); diff --git a/tests/field/codegen/Poisson.py b/tests/field/codegen/Poisson.py index a4202ad5a650ade4be32c2f440977c541e090439..8e27d5f8dcf135c820d3de45d1a943205ac921ca 100644 --- a/tests/field/codegen/Poisson.py +++ b/tests/field/codegen/Poisson.py @@ -6,14 +6,14 @@ from pystencils_walberla import CodeGeneration, generate_sweep with CodeGeneration() as ctx: field_type = "float64" if ctx.double_accuracy else "float32" # ----- Solving the 2D Poisson equation with rhs -------------------------- - dx = sp.Symbol("dx") - dy = sp.Symbol("dy") + dx2 = sp.Symbol("dx_square") + dy2 = sp.Symbol("dy_square") src, dst, rhs = ps.fields(f"src, src_tmp, rhs: {field_type}[2D]", layout='fzyx') @ps.kernel def kernel_func(): - src[0, 0] @= ((dy**2 * (src[1, 0] + src[-1, 0])) - + (dx**2 * (src[0, 1] + src[0, -1])) - - (rhs[0, 0] * dx**2 * dy**2)) / (2 * (dx**2 + dy**2)) + src[0, 0] @= ((dy2 * (src[1, 0] + src[-1, 0])) + + (dx2 * (src[0, 1] + src[0, -1])) + - (rhs[0, 0] * dx2 * dy2)) / (2.0 * (dx2 + dy2)) generate_sweep(ctx, 'Poisson', kernel_func)