diff --git a/tests/cuda/codegen/CodegenJacobiGPU.cpp b/tests/cuda/codegen/CodegenJacobiGPU.cpp
index a8ebd370e44f6304ec2f0f6c17c8028b65ff10e1..824ba1f644be17d545762be270f03fa1471e1f68 100644
--- a/tests/cuda/codegen/CodegenJacobiGPU.cpp
+++ b/tests/cuda/codegen/CodegenJacobiGPU.cpp
@@ -78,7 +78,7 @@ void testJacobi2D()
    // Create blocks
    shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
            uint_t(1) , uint_t(1),  uint_t(1),  // number of blocks in x,y,z direction
-           xSize, ySize, uint_t(1),  // how many cells per block (x,y,z)
+           xSize, ySize, uint_t(1),            // how many cells per block (x,y,z)
            real_t(1),                          // dx: length of one cell in physical coordinates
            false,                              // one block per process - "false" means all blocks to one process
            true, true, true );                 // no periodicity
@@ -87,7 +87,8 @@ void testJacobi2D()
    BlockDataID cpuFieldID = blocks->addStructuredBlockData<ScalarField>( &createField, "CPU Field" );
    BlockDataID gpuField = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" );
 
-
+   // Initialize a quarter of the field with ones, the rest remains 0
+   // Jacobi averages the domain -> every cell should be at 0.25 at sufficiently many timesteps
    for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
    {
       auto f = blockIt->getData<ScalarField>( cpuFieldID );
@@ -96,8 +97,6 @@ void testJacobi2D()
             f->get( x, y, 0 ) = 1.0;
    }
 
-
-
    typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9> CommScheme;
    typedef cuda::communication::GPUPackInfo<GPUField> Packing;
 
@@ -110,7 +109,7 @@ void testJacobi2D()
 
    // Registering the sweep
    timeloop.add() << BeforeFunction(  commScheme, "Communication" )
-                  << Sweep( pystencils::CudaJacobiKernel2D(gpuField, 1.0), "Jacobi Kernel" );
+                  << Sweep( pystencils::CudaJacobiKernel2D(gpuField), "Jacobi Kernel" );
 
 
    cuda::fieldCpy<GPUField, ScalarField>( blocks, gpuField, cpuFieldID );
@@ -141,7 +140,8 @@ void testJacobi3D()
    BlockDataID cpuFieldID = blocks->addStructuredBlockData<ScalarField>( &createField, "CPU Field" );
    BlockDataID gpuField = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" );
 
-
+   // Initialize a quarter of the field with ones, the rest remains 0
+   // Jacobi averages the domain -> every cell should be at 0.25 at sufficiently many timesteps
    for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
    {
       auto f = blockIt->getData<ScalarField>( cpuFieldID );
@@ -151,8 +151,6 @@ void testJacobi3D()
                f->get( x, y, z ) = 1.0;
    }
 
-
-
    typedef blockforest::communication::UniformBufferedScheme<stencil::D3Q7> CommScheme;
    typedef cuda::communication::GPUPackInfo<GPUField> Packing;
 
@@ -165,7 +163,7 @@ void testJacobi3D()
 
    // Registering the sweep
    timeloop.add() << BeforeFunction(  commScheme, "Communication" )
-                  << Sweep( pystencils::CudaJacobiKernel3D(gpuField, 1.0), "Jacobi Kernel" );
+                  << Sweep( pystencils::CudaJacobiKernel3D(gpuField), "Jacobi Kernel" );
 
 
    cuda::fieldCpy<GPUField, ScalarField>( blocks, gpuField, cpuFieldID );
diff --git a/tests/cuda/codegen/CodegenPoissonGPU.cpp b/tests/cuda/codegen/CodegenPoissonGPU.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fd88bf1fb0a7749921eb48fc9b58006a05d0128d
--- /dev/null
+++ b/tests/cuda/codegen/CodegenPoissonGPU.cpp
@@ -0,0 +1,154 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file PoissonGpu.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#include "PoissonGPU.h"
+
+#include "cuda/HostFieldAllocator.h"
+#include "blockforest/Initialization.h"
+#include "blockforest/communication/UniformDirectScheme.h"
+#include "blockforest/communication/UniformBufferedScheme.h"
+
+#include "core/Environment.h"
+#include "core/debug/TestSubsystem.h"
+#include "core/math/Constants.h"
+
+#include "cuda/HostFieldAllocator.h"
+#include "cuda/FieldCopy.h"
+#include "cuda/GPUField.h"
+#include "cuda/Kernel.h"
+#include "cuda/AddGPUFieldToStorage.h"
+#include "cuda/communication/GPUPackInfo.h"
+#include "cuda/FieldIndexing.h"
+
+#include "field/AddToStorage.h"
+#include "field/communication/UniformMPIDatatypeInfo.h"
+#include "field/vtk/VTKWriter.h"
+
+#include "geometry/initializer/ScalarFieldFromGrayScaleImage.h"
+
+#include "gui/Gui.h"
+
+#include "stencil/D2Q9.h"
+
+#include "timeloop/SweepTimeloop.h"
+
+
+using namespace walberla;
+
+typedef GhostLayerField<real_t, 1> ScalarField_T;
+typedef cuda::GPUField<real_t> GPUField;
+
+
+// U with Dirichlet Boundary
+void initU( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & srcId )
+{
+   for( auto block = blocks->begin(); block != blocks->end(); ++block )
+   {
+      if( blocks->atDomainYMaxBorder( *block ) )
+      {
+         ScalarField_T * src = block->getData< ScalarField_T >( srcId );
+         CellInterval xyz = src->xyzSizeWithGhostLayer();
+         xyz.yMin() = xyz.yMax();
+         for( auto cell = xyz.begin(); cell != xyz.end(); ++cell )
+         {
+            const Vector3< real_t > p = blocks->getBlockLocalCellCenter( *block, *cell );
+            src->get( *cell ) = std::sin(math::pi * p[0] ) * std::sinh(math::pi * p[1] );
+         }
+      }
+   }
+}
+
+// right hand side
+void initF( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & fId )
+{
+   for( auto block = blocks->begin(); block != blocks->end(); ++block )
+   {
+      ScalarField_T * f = block->getData< ScalarField_T >( fId );
+      CellInterval xyz = f->xyzSize();
+      for( auto cell = xyz.begin(); cell != xyz.end(); ++cell )
+      {
+         f->get( *cell ) = 0.0;
+      }
+   }
+}
+
+void testPoisson()
+{
+   const uint_t xCells = uint_t(200);
+   const uint_t yCells = uint_t(100);
+   const real_t xSize = real_t(2);
+   const real_t ySize = real_t(1);
+   const real_t dx = xSize / real_c( xCells + uint_t(1) );
+   const real_t dy = ySize / real_c( yCells + uint_t(1) );
+
+   // Create blocks
+   shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
+      math::AABB( real_t(0.5) * dx, real_t(0.5) * dy, real_t(0),
+                  xSize - real_t(0.5) * dx, ySize - real_t(0.5) * dy, dx ),
+      uint_t(1) , uint_t(1),  uint_t(1),  // number of blocks in x,y,z direction
+      xCells, yCells, uint_t(1),          // how many cells per block (x,y,z)
+      false,                              // one block per process - "false" means all blocks to one process
+      false, false, false );              // no periodicity
+
+
+   BlockDataID cpuFieldID = field::addToStorage< ScalarField_T >( blocks, "CPU Field src", real_t(0.0) );
+   BlockDataID gpuField = cuda::addGPUFieldToStorage<ScalarField_T>( blocks, cpuFieldID, "GPU Field src" );
+   initU( blocks, cpuFieldID );
+
+   BlockDataID cpufId = field::addToStorage< ScalarField_T >( blocks, "CPU Field f", real_t(0.0));
+   BlockDataID gpufId = cuda::addGPUFieldToStorage<ScalarField_T>( blocks, cpufId, "GPU Field f" );
+   initF( blocks, cpufId );
+
+
+   typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9> CommScheme;
+   typedef cuda::communication::GPUPackInfo<GPUField> Packing;
+
+   CommScheme commScheme(blocks);
+   commScheme.addDataToCommunicate( make_shared<Packing>(gpuField) );
+
+   // Create Timeloop
+   const uint_t numberOfTimesteps = uint_t(10000);
+   SweepTimeloop timeloop ( blocks, numberOfTimesteps );
+
+   // Registering the sweep
+   timeloop.add() << BeforeFunction(  commScheme, "Communication" )
+                  << Sweep( pystencils::PoissonGPU(gpufId, gpuField, dx, dy), "Jacobi Kernel" );
+
+
+   cuda::fieldCpy<GPUField, ScalarField_T>( blocks, gpuField, cpuFieldID );
+   cuda::fieldCpy<GPUField, ScalarField_T>( blocks, gpufId, cpufId );
+   timeloop.run();
+   cuda::fieldCpy<ScalarField_T, GPUField>( blocks, cpuFieldID, gpuField );
+
+   auto firstBlock = blocks->begin();
+   auto f = firstBlock->getData<ScalarField_T>( cpuFieldID );
+   WALBERLA_CHECK_LESS(f->get(50,99,0) - std::sin( math::pi  * 0.5 ) * std::sinh( math::pi * 0.99 ), 0.01);
+}
+
+
+int main( int argc, char ** argv )
+{
+   mpi::Environment env( argc, argv );
+   debug::enterTestMode();
+
+   testPoisson();
+
+   return 0;
+}
diff --git a/tests/cuda/codegen/CudaJacobiKernel.py b/tests/cuda/codegen/CudaJacobiKernel.py
index 7ec84032a5c7941ddfee67f1484a08dca2014193..f8713d1d13e8336b9ccd0212d5f146067ea38e21 100644
--- a/tests/cuda/codegen/CudaJacobiKernel.py
+++ b/tests/cuda/codegen/CudaJacobiKernel.py
@@ -1,26 +1,25 @@
+import numpy as np
 import sympy as sp
 import pystencils as ps
 from pystencils_walberla import CodeGeneration, generate_sweep
 
 
 with CodeGeneration() as ctx:
-    h = sp.symbols("h")
-
-    # ----- Jacobi 2D - created by specifying weights in nested list --------------------------
-    src, dst = ps.fields("src, src_tmp: [2D]")
-    stencil = [[0, -1, 0],
-               [-1, 4, -1],
-               [0, -1, 0]]
-    assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=4 * h**2)
+    # ----- Stencil 2D - created by specifying weights in nested list --------------------------
+    src, dst = ps.fields("src, src_tmp: [2D]", layout='fzyx')
+    stencil = [[1.11, 2.22, 3.33],
+               [4.44, 5.55, 6.66],
+               [7.77, 8.88, 9.99]]
+    assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=1 / np.sum(stencil))
     generate_sweep(ctx, 'CudaJacobiKernel2D', assignments, field_swaps=[(src, dst)], target="gpu")
 
-    # ----- Jacobi 3D - created by using kernel_decorator with assignments in '@=' format -----
+    # ----- Stencil 3D - created by using kernel_decorator with assignments in '@=' format -----
     src, dst = ps.fields("src, src_tmp: [3D]")
 
     @ps.kernel
     def kernel_func():
-        dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0] +
-                         src[0, 1, 0] + src[0, -1, 0] +
-                         src[0, 0, 1] + src[0, 0, -1]) / (6 * h ** 2)
+        dst[0, 0, 0] @= (3 * src[1, 0, 0] + 4 * src[-1, 0, 0] +
+                         5 * src[0, 1, 0] + 6 * src[0, -1, 0] +
+                         7 * src[0, 0, 1] + 8 * src[0, 0, -1]) / 33
 
     generate_sweep(ctx, 'CudaJacobiKernel3D', kernel_func, field_swaps=[(src, dst)], target="gpu")
diff --git a/tests/cuda/codegen/CudaPoisson.py b/tests/cuda/codegen/CudaPoisson.py
new file mode 100644
index 0000000000000000000000000000000000000000..53752ee96d463a4021e2c97c24d694fc9887217a
--- /dev/null
+++ b/tests/cuda/codegen/CudaPoisson.py
@@ -0,0 +1,19 @@
+import numpy as np
+import sympy as sp
+import pystencils as ps
+from pystencils_walberla import CodeGeneration, generate_sweep
+
+
+with CodeGeneration() as ctx:
+    # ----- Solving the 2D Poisson equation with rhs --------------------------
+    dx = sp.Symbol("dx")
+    dy = sp.Symbol("dy")
+    src, dst, rhs = ps.fields("src, src_tmp, rhs: [2D]", layout='fzyx')
+
+    @ps.kernel
+    def kernel_func():
+        src[0, 0] @= ((dy**2 * (src[1, 0] + src[-1, 0])) +
+                      (dx**2 * (src[0, 1] + src[0, -1])) -
+                      (rhs[0, 0] * dx**2 * dy**2)) / (2 * (dx**2 + dy**2))
+
+    generate_sweep(ctx, 'PoissonGPU', kernel_func, target='gpu')
diff --git a/tests/field/CMakeLists.txt b/tests/field/CMakeLists.txt
index bcb806a0d899c2e0dad75ed0f42b317bff5733e0..c0091537003048ed7f135181b7a96d55b7a3f15d 100644
--- a/tests/field/CMakeLists.txt
+++ b/tests/field/CMakeLists.txt
@@ -68,4 +68,9 @@ waLBerla_generate_target_from_python(NAME CodegenJacobiCPUGeneratedJacobiKernel
 waLBerla_compile_test( FILES codegen/CodegenJacobiCPU.cpp DEPENDS gui timeloop CodegenJacobiCPUGeneratedJacobiKernel)
 waLBerla_execute_test( NAME CodegenJacobiCPU )
 
+waLBerla_generate_target_from_python(NAME CodegenPoissonGeneratedKernel FILE codegen/Poisson.py
+      OUT_FILES Poisson.cpp Poisson.h )
+waLBerla_compile_test( FILES codegen/CodegenPoisson.cpp DEPENDS gui timeloop CodegenPoissonGeneratedKernel)
+waLBerla_execute_test( NAME CodegenPoisson )
+
 
diff --git a/tests/field/codegen/CodegenJacobiCPU.cpp b/tests/field/codegen/CodegenJacobiCPU.cpp
index aefaf67e2b0152621dad007e3a4c7dfa2422fcde..20bc8b06116b9437de17e5967bf44e83df4100fb 100644
--- a/tests/field/codegen/CodegenJacobiCPU.cpp
+++ b/tests/field/codegen/CodegenJacobiCPU.cpp
@@ -79,12 +79,13 @@ void testJacobi2D()
 
    // Registering the sweep
    timeloop.add() << BeforeFunction(  commScheme, "Communication" )
-                  << Sweep( pystencils::JacobiKernel2D(fieldID, 1.0), "Jacobi Kernel" );
+                  << Sweep( pystencils::JacobiKernel2D(fieldID), "Jacobi Kernel" );
 
    timeloop.run();
 
    auto firstBlock = blocks->begin();
    auto f = firstBlock->getData<ScalarField>( fieldID );
+
    WALBERLA_CHECK_FLOAT_EQUAL(f->get(0,0,0), real_t(1.0 / 4.0));
 }
 
@@ -127,7 +128,7 @@ void testJacobi3D()
 
    // Registering the sweep
    timeloop.add() << BeforeFunction(  commScheme, "Communication" )
-                  << Sweep( pystencils::JacobiKernel3D(fieldID, 1.0), "Jacobi Kernel" );
+                  << Sweep( pystencils::JacobiKernel3D(fieldID), "Jacobi Kernel" );
 
    timeloop.run();
 
diff --git a/tests/field/codegen/CodegenPoisson.cpp b/tests/field/codegen/CodegenPoisson.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f5137375a14eecb4bf79e007bef72bc1bbf089f3
--- /dev/null
+++ b/tests/field/codegen/CodegenPoisson.cpp
@@ -0,0 +1,133 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file CodegenPoisson.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#include "Poisson.h"
+#include "blockforest/Initialization.h"
+#include "blockforest/communication/UniformBufferedScheme.h"
+
+#include "core/Environment.h"
+#include "core/debug/TestSubsystem.h"
+#include "core/math/Constants.h"
+
+#include "field/AddToStorage.h"
+#include "field/communication/PackInfo.h"
+#include "field/vtk/VTKWriter.h"
+
+#include "gui/Gui.h"
+
+#include "stencil/D2Q9.h"
+#include "timeloop/SweepTimeloop.h"
+
+#include "vtk/VTKOutput.h"
+
+
+using namespace walberla;
+
+typedef GhostLayerField<real_t, 1> ScalarField_T;
+
+// U with Dirichlet Boundary
+void initU( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & srcId )
+{
+   for( auto block = blocks->begin(); block != blocks->end(); ++block )
+   {
+      if( blocks->atDomainYMaxBorder( *block ) )
+      {
+         ScalarField_T * src = block->getData< ScalarField_T >( srcId );
+         CellInterval xyz = src->xyzSizeWithGhostLayer();
+         xyz.yMin() = xyz.yMax();
+         for( auto cell = xyz.begin(); cell != xyz.end(); ++cell )
+         {
+            const Vector3< real_t > p = blocks->getBlockLocalCellCenter( *block, *cell );
+            src->get( *cell ) = std::sin( math::pi * p[0] ) * std::sinh( math::pi * p[1] );
+         }
+      }
+   }
+}
+
+// right hand side
+void initF( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & fId )
+{
+   for( auto block = blocks->begin(); block != blocks->end(); ++block )
+   {
+      ScalarField_T * f = block->getData< ScalarField_T >( fId );
+      CellInterval xyz = f->xyzSize();
+      for( auto cell = xyz.begin(); cell != xyz.end(); ++cell )
+      {
+         f->get( *cell ) = 0.0;
+      }
+   }
+}
+
+void testPoisson()
+{
+
+   const uint_t xCells = uint_t(100);
+   const uint_t yCells = uint_t(100);
+   const real_t xSize = real_t(1);
+   const real_t ySize = real_t(1);
+   const real_t dx = xSize / real_c( xCells + uint_t(1) );
+   const real_t dy = ySize / real_c( yCells + uint_t(1) );
+
+   // Create blocks
+   shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
+           math::AABB( real_t(0.5) * dx, real_t(0.5) * dy, real_t(0),
+                       xSize - real_t(0.5) * dx, ySize - real_t(0.5) * dy, dx ),
+           uint_t(1) , uint_t(1),  uint_t(1),  // number of blocks in x,y,z direction
+           xCells, yCells, uint_t(1),          // how many cells per block (x,y,z)
+           false,                              // one block per process - "false" means all blocks to one process
+           false, false, false );              // no periodicity
+
+
+   BlockDataID fieldID = field::addToStorage<ScalarField_T>(blocks, "Field", real_t(0.0));
+   initU( blocks, fieldID );
+
+   BlockDataID fId = field::addToStorage< ScalarField_T >( blocks, "f", real_t(0.0));
+   initF( blocks, fId );
+
+   typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9> CommScheme;
+   typedef field::communication::PackInfo<ScalarField_T> Packing;
+   CommScheme commScheme(blocks);
+   commScheme.addDataToCommunicate( make_shared<Packing>(fieldID) );
+
+   // Create Timeloop
+   const uint_t numberOfTimesteps = uint_t(10000);
+   SweepTimeloop timeloop ( blocks, numberOfTimesteps );
+
+   // Registering the sweep
+   timeloop.add() << BeforeFunction(  commScheme, "Communication" )
+                  << Sweep( pystencils::Poisson(fId, fieldID, dx, dy), "Poisson Kernel" );
+
+   timeloop.run();
+
+   auto firstBlock = blocks->begin();
+   auto f = firstBlock->getData<ScalarField_T>( fieldID );
+   WALBERLA_CHECK_LESS(f->get(50,99,0) - std::sin( math::pi  * 0.5 ) * std::sinh( math::pi * 0.99 ), 0.01);
+}
+
+
+int main( int argc, char ** argv )
+{
+   mpi::Environment env( argc, argv );
+   debug::enterTestMode();
+
+   testPoisson();
+
+   return EXIT_SUCCESS;
+}
diff --git a/tests/field/codegen/JacobiKernel.py b/tests/field/codegen/JacobiKernel.py
index b2da5369dddee8223a3035bff10174d27f52fe70..3421c8cdd3fc6a0fb88e92ab479fbb2b768a7648 100644
--- a/tests/field/codegen/JacobiKernel.py
+++ b/tests/field/codegen/JacobiKernel.py
@@ -1,26 +1,24 @@
-import sympy as sp
+import numpy as np
 import pystencils as ps
 from pystencils_walberla import CodeGeneration, generate_sweep
 
 
 with CodeGeneration() as ctx:
-    h = sp.symbols("h")
-
-    # ----- Jacobi 2D - created by specifying weights in nested list --------------------------
+    # ----- Stencil 2D - created by specifying weights in nested list --------------------------
     src, dst = ps.fields("src, src_tmp: [2D]", layout='fzyx')
-    stencil = [[0, 1, 0],
-               [1, 0, 1],
-               [0, 1, 0]]
-    assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=1 / (4 * h ** 2))
+    stencil = [[1.11, 2.22, 3.33],
+               [4.44, 5.55, 6.66],
+               [7.77, 8.88, 9.99]]
+    assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=1 / np.sum(stencil))
     generate_sweep(ctx, 'JacobiKernel2D', assignments, field_swaps=[(src, dst)])
 
-    # ----- Jacobi 3D - created by using kernel_decorator with assignments in '@=' format -----
-    src, dst = ps.fields("src, src_tmp: [3D]")
+    # ----- Stencil 3D - created by using kernel_decorator with assignments in '@=' format -----
+    src, dst = ps.fields("src, src_tmp: [3D]", layout='fzyx')
 
     @ps.kernel
     def kernel_func():
-        dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0] +
-                         src[0, 1, 0] + src[0, -1, 0] +
-                         src[0, 0, 1] + src[0, 0, -1]) / (h ** 2 * 6)
+        dst[0, 0, 0] @= (3 * src[1, 0, 0] + 4 * src[-1, 0, 0] +
+                         5 * src[0, 1, 0] + 6 * src[0, -1, 0] +
+                         7 * src[0, 0, 1] + 8 * src[0, 0, -1]) / 33
 
     generate_sweep(ctx, 'JacobiKernel3D', kernel_func, field_swaps=[(src, dst)])
diff --git a/tests/field/codegen/Poisson.py b/tests/field/codegen/Poisson.py
new file mode 100644
index 0000000000000000000000000000000000000000..6edec0315778aa2140303fc1158525a9344eb3dd
--- /dev/null
+++ b/tests/field/codegen/Poisson.py
@@ -0,0 +1,18 @@
+import sympy as sp
+import pystencils as ps
+from pystencils_walberla import CodeGeneration, generate_sweep
+
+
+with CodeGeneration() as ctx:
+    # ----- Solving the 2D Poisson equation with rhs --------------------------
+    dx = sp.Symbol("dx")
+    dy = sp.Symbol("dy")
+    src, dst, rhs = ps.fields("src, src_tmp, rhs: [2D]", layout='fzyx')
+
+    @ps.kernel
+    def kernel_func():
+        src[0, 0] @= ((dy**2 * (src[1, 0] + src[-1, 0])) +
+                      (dx**2 * (src[0, 1] + src[0, -1])) -
+                      (rhs[0, 0] * dx**2 * dy**2)) / (2 * (dx**2 + dy**2))
+
+    generate_sweep(ctx, 'Poisson', kernel_func)