From 9159b1f5c139a993f896a32d8e47f95b585103de Mon Sep 17 00:00:00 2001
From: Markus Holzer <markus.holzer@fau.de>
Date: Tue, 5 Jul 2022 07:41:56 +0200
Subject: [PATCH] Minor Fixes for Codegen applications

---
 .../PhaseFieldAllenCahn/multiphase_codegen.py          |  4 +++-
 apps/benchmarks/UniformGridCPU/UniformGridCPU.py       |  2 +-
 tests/field/codegen/CodegenPoissonCPU.cpp              |  2 +-
 tests/field/codegen/Poisson.py                         | 10 +++++-----
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py b/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py
index e9bfa1dae..e8b28a4e5 100644
--- a/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py
+++ b/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py
@@ -123,7 +123,9 @@ with CodeGeneration() as ctx:
     # GENERATE SWEEPS #
     ###################
 
-    cpu_vec = {'assume_inner_stride_one': True, 'nontemporal': True}
+    # by default NT Stores are deactivated because they do not work in all cases
+    # must be activated to achieve full potential for example on AVX512 CPUs
+    cpu_vec = {'assume_inner_stride_one': True, 'nontemporal': False}
 
     vp = [('int32_t', 'cudaBlockSize0'),
           ('int32_t', 'cudaBlockSize1'),
diff --git a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py
index c8f500174..f231da23c 100644
--- a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py
+++ b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py
@@ -97,7 +97,7 @@ with CodeGeneration() as ctx:
     openmp = True if ctx.openmp else False
     field_type = "float64" if ctx.double_accuracy else "float32"
     if ctx.optimize_for_localhost:
-        cpu_vec = {"nontemporal": True, "assume_aligned": True}
+        cpu_vec = {"nontemporal": False, "assume_aligned": True}
     else:
         cpu_vec = None
 
diff --git a/tests/field/codegen/CodegenPoissonCPU.cpp b/tests/field/codegen/CodegenPoissonCPU.cpp
index 6c5696d40..582195ca4 100644
--- a/tests/field/codegen/CodegenPoissonCPU.cpp
+++ b/tests/field/codegen/CodegenPoissonCPU.cpp
@@ -109,7 +109,7 @@ void testPoisson()
 
    // Registering the sweep
    timeloop.add() << BeforeFunction(  commScheme, "Communication" )
-                  << Sweep( pystencils::Poisson(fId, fieldID, dx, dy), "Poisson Kernel" );
+                  << Sweep( pystencils::Poisson(fId, fieldID, dx*dx, dy*dy), "Poisson Kernel" );
 
    timeloop.run();
 
diff --git a/tests/field/codegen/Poisson.py b/tests/field/codegen/Poisson.py
index a4202ad5a..8e27d5f8d 100644
--- a/tests/field/codegen/Poisson.py
+++ b/tests/field/codegen/Poisson.py
@@ -6,14 +6,14 @@ from pystencils_walberla import CodeGeneration, generate_sweep
 with CodeGeneration() as ctx:
     field_type = "float64" if ctx.double_accuracy else "float32"
     # ----- Solving the 2D Poisson equation with rhs --------------------------
-    dx = sp.Symbol("dx")
-    dy = sp.Symbol("dy")
+    dx2 = sp.Symbol("dx_square")
+    dy2 = sp.Symbol("dy_square")
     src, dst, rhs = ps.fields(f"src, src_tmp, rhs: {field_type}[2D]", layout='fzyx')
 
     @ps.kernel
     def kernel_func():
-        src[0, 0] @= ((dy**2 * (src[1, 0] + src[-1, 0]))
-                      + (dx**2 * (src[0, 1] + src[0, -1]))
-                      - (rhs[0, 0] * dx**2 * dy**2)) / (2 * (dx**2 + dy**2))
+        src[0, 0] @= ((dy2 * (src[1, 0] + src[-1, 0]))
+                      + (dx2 * (src[0, 1] + src[0, -1]))
+                      - (rhs[0, 0] * dx2 * dy2)) / (2.0 * (dx2 + dy2))
 
     generate_sweep(ctx, 'Poisson', kernel_func)
-- 
GitLab