diff --git a/tests/cuda/codegen/CodegenJacobiGPU.cpp b/tests/cuda/codegen/CodegenJacobiGPU.cpp
index d2e719e5a23e0197d83b3f5f929d38960bee592b..824ba1f644be17d545762be270f03fa1471e1f68 100644
--- a/tests/cuda/codegen/CodegenJacobiGPU.cpp
+++ b/tests/cuda/codegen/CodegenJacobiGPU.cpp
@@ -109,7 +109,7 @@ void testJacobi2D()
 
    // Registering the sweep
    timeloop.add() << BeforeFunction(  commScheme, "Communication" )
-                  << Sweep( pystencils::CudaJacobiKernel2D(gpuField, 2.0), "Jacobi Kernel" );
+                  << Sweep( pystencils::CudaJacobiKernel2D(gpuField), "Jacobi Kernel" );
 
 
    cuda::fieldCpy<GPUField, ScalarField>( blocks, gpuField, cpuFieldID );
@@ -163,7 +163,7 @@ void testJacobi3D()
 
    // Registering the sweep
    timeloop.add() << BeforeFunction(  commScheme, "Communication" )
-                  << Sweep( pystencils::CudaJacobiKernel3D(gpuField, 1.0), "Jacobi Kernel" );
+                  << Sweep( pystencils::CudaJacobiKernel3D(gpuField), "Jacobi Kernel" );
 
 
    cuda::fieldCpy<GPUField, ScalarField>( blocks, gpuField, cpuFieldID );
diff --git a/tests/cuda/codegen/CodegenPoissonGPU.cpp b/tests/cuda/codegen/CodegenPoissonGPU.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..166e8793ced3fa6b31a9f52e08dd182729573fd4
--- /dev/null
+++ b/tests/cuda/codegen/CodegenPoissonGPU.cpp
@@ -0,0 +1,151 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \file PoissonGpu.cpp
+//! \author Markus Holzer <markus.holzer@fau.de>
+//
+//======================================================================================================================
+
+#include "PoissonGPU.h"
+
+#include "cuda/HostFieldAllocator.h"
+#include "blockforest/Initialization.h"
+#include "blockforest/communication/UniformDirectScheme.h"
+#include "blockforest/communication/UniformBufferedScheme.h"
+
+#include "core/Environment.h"
+#include "core/debug/TestSubsystem.h"
+#include "core/math/Constants.h"
+
+#include "cuda/HostFieldAllocator.h"
+#include "cuda/FieldCopy.h"
+#include "cuda/GPUField.h"
+#include "cuda/Kernel.h"
+#include "cuda/AddGPUFieldToStorage.h"
+#include "cuda/communication/GPUPackInfo.h"
+#include "cuda/FieldIndexing.h"
+
+#include "field/AddToStorage.h"
+#include "field/communication/UniformMPIDatatypeInfo.h"
+#include "field/vtk/VTKWriter.h"
+
+#include "geometry/initializer/ScalarFieldFromGrayScaleImage.h"
+
+#include "gui/Gui.h"
+
+#include "stencil/D2Q9.h"
+
+#include "timeloop/SweepTimeloop.h"
+
+
+using namespace walberla;
+
+typedef GhostLayerField<real_t, 1> ScalarField_T;
+typedef cuda::GPUField<real_t> GPUField;
+
+
+// U with Dirichlet Boundary
+void initU( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & srcId )
+{
+   for( auto block = blocks->begin(); block != blocks->end(); ++block )
+   {
+      if( blocks->atDomainYMaxBorder( *block ) )
+      {
+         ScalarField_T * src = block->getData< ScalarField_T >( srcId );
+         CellInterval xyz = src->xyzSizeWithGhostLayer();
+         xyz.yMin() = xyz.yMax();
+         for( auto cell = xyz.begin(); cell != xyz.end(); ++cell )
+         {
+            const Vector3< real_t > p = blocks->getBlockLocalCellCenter( *block, *cell );
+            src->get( *cell ) = std::sin(math::pi * p[0] ) * std::sinh(math::pi * p[1] );
+         }
+      }
+   }
+}
+
+// right hand side
+void initF( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & fId )
+{
+   for( auto block = blocks->begin(); block != blocks->end(); ++block )
+   {
+      ScalarField_T * f = block->getData< ScalarField_T >( fId );
+      CellInterval xyz = f->xyzSize();
+      for( auto cell = xyz.begin(); cell != xyz.end(); ++cell )
+      {
+         const Vector3< real_t > p = blocks->getBlockLocalCellCenter( *block, *cell );
+         f->get( *cell ) = real_t(4) * math::pi * math::pi * std::sin( real_t(2) * math::pi * p[0] ) * std::sinh( real_t(2) * math::pi * p[1] );
+      }
+   }
+}
+
+void testPoisson()
+{
+   const uint_t xCells = uint_t(200);
+   const uint_t yCells = uint_t(100);
+   const real_t xSize = real_t(2);
+   const real_t ySize = real_t(1);
+   const real_t dx = xSize / real_c( xCells + uint_t(1) );
+   const real_t dy = ySize / real_c( yCells + uint_t(1) );
+
+   // Create blocks
+   shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
+      math::AABB( real_t(0.5) * dx, real_t(0.5) * dy, real_t(0),
+                  xSize - real_t(0.5) * dx, ySize - real_t(0.5) * dy, dx ),
+      uint_t(1) , uint_t(1),  uint_t(1),  // number of blocks in x,y,z direction
+      xCells, yCells, uint_t(1),          // how many cells per block (x,y,z)
+      false,                              // one block per process - "false" means all blocks to one process
+      false, false, false );              // no periodicity
+
+
+   BlockDataID cpuFieldID = field::addToStorage< ScalarField_T >( blocks, "CPU Field src", real_t(0.0) );
+   BlockDataID gpuField = cuda::addGPUFieldToStorage<ScalarField_T>( blocks, cpuFieldID, "GPU Field src" );
+   initU( blocks, cpuFieldID );
+
+   BlockDataID cpufId = field::addToStorage< ScalarField_T >( blocks, "CPU Field f", real_t(0.0));
+   BlockDataID gpufId = cuda::addGPUFieldToStorage<ScalarField_T>( blocks, cpufId, "GPU Field f" );
+   initF( blocks, cpufId );
+
+
+   typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9> CommScheme;
+   typedef cuda::communication::GPUPackInfo<GPUField> Packing;
+
+   CommScheme commScheme(blocks);
+   commScheme.addDataToCommunicate( make_shared<Packing>(gpuField) );
+
+   // Create Timeloop
+   const uint_t numberOfTimesteps = uint_t(10000);
+   SweepTimeloop timeloop ( blocks, numberOfTimesteps );
+
+   // Registering the sweep
+   timeloop.add() << BeforeFunction(  commScheme, "Communication" )
+                  << Sweep( pystencils::PoissonGPU(gpufId, gpuField, dx, dy), "Jacobi Kernel" );
+
+
+   cuda::fieldCpy<GPUField, ScalarField_T>( blocks, gpuField, cpuFieldID );
+   cuda::fieldCpy<GPUField, ScalarField_T>( blocks, gpufId, cpufId );
+   timeloop.run();
+   cuda::fieldCpy<ScalarField_T, GPUField>( blocks, cpuFieldID, gpuField );
+}
+
+
+int main( int argc, char ** argv )
+{
+   mpi::Environment env( argc, argv );
+   debug::enterTestMode();
+
+   testPoisson();
+
+   return 0;
+}
diff --git a/tests/cuda/codegen/CudaJacobiKernel.py b/tests/cuda/codegen/CudaJacobiKernel.py
index 9d1362066c7684c2ca6af5de48d4dc552097d054..f8713d1d13e8336b9ccd0212d5f146067ea38e21 100644
--- a/tests/cuda/codegen/CudaJacobiKernel.py
+++ b/tests/cuda/codegen/CudaJacobiKernel.py
@@ -1,26 +1,25 @@
+import numpy as np
 import sympy as sp
 import pystencils as ps
 from pystencils_walberla import CodeGeneration, generate_sweep
 
 
 with CodeGeneration() as ctx:
-    h = sp.symbols("h")
-
-    # ----- Jacobi 2D - created by specifying weights in nested list --------------------------
+    # ----- Stencil 2D - created by specifying weights in nested list --------------------------
     src, dst = ps.fields("src, src_tmp: [2D]", layout='fzyx')
-    stencil = [[0, 4, 0],
-               [4, 0, 4],
-               [0, 4, 0]]
-    assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=1 / (4 * h ** 2))
+    stencil = [[1.11, 2.22, 3.33],
+               [4.44, 5.55, 6.66],
+               [7.77, 8.88, 9.99]]
+    assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=1 / np.sum(stencil))
     generate_sweep(ctx, 'CudaJacobiKernel2D', assignments, field_swaps=[(src, dst)], target="gpu")
 
-    # ----- Jacobi 3D - created by using kernel_decorator with assignments in '@=' format -----
+    # ----- Stencil 3D - created by using kernel_decorator with assignments in '@=' format -----
     src, dst = ps.fields("src, src_tmp: [3D]")
 
     @ps.kernel
     def kernel_func():
-        dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0] +
-                         src[0, 1, 0] + src[0, -1, 0] +
-                         src[0, 0, 1] + src[0, 0, -1]) / (6 * h ** 2)
+        dst[0, 0, 0] @= (3 * src[1, 0, 0] + 4 * src[-1, 0, 0] +
+                         5 * src[0, 1, 0] + 6 * src[0, -1, 0] +
+                         7 * src[0, 0, 1] + 8 * src[0, 0, -1]) / 33
 
     generate_sweep(ctx, 'CudaJacobiKernel3D', kernel_func, field_swaps=[(src, dst)], target="gpu")
diff --git a/tests/cuda/codegen/CudaPoisson.py b/tests/cuda/codegen/CudaPoisson.py
new file mode 100644
index 0000000000000000000000000000000000000000..011b41494099e691642bcf3788ec8aef401a17bb
--- /dev/null
+++ b/tests/cuda/codegen/CudaPoisson.py
@@ -0,0 +1,19 @@
+import numpy as np
+import sympy as sp
+import pystencils as ps
+from pystencils_walberla import CodeGeneration, generate_sweep
+
+
+with CodeGeneration() as ctx:
+    # ----- Solving the 2D Poisson equation with rhs --------------------------
+    dx = sp.Symbol("dx")
+    dy = sp.Symbol("dy")
+    src, dst, rhs = ps.fields("src, src_tmp, rhs: [2D]", layout='fzyx')
+
+    @ps.kernel
+    def kernel_func():
+        dst[0, 0] @= ((dx**2 * (src[1, 0] + src[-1, 0])) +
+                      (dy**2 * (src[0, 1] + src[0, -1])) -
+                      (rhs[0, 0] * dx**2 * dy**2)) / (2 * (dx**2 + dy**2))
+
+    generate_sweep(ctx, 'PoissonGPU', kernel_func, field_swaps=[(src, dst)], target='gpu')
diff --git a/tests/field/codegen/CodegenPoisson.cpp b/tests/field/codegen/CodegenPoisson.cpp
index c390831b649d68a075eb539333fdd8cfe3b4236f..52b93bbbdeab9ecb6f735e2732da678b8c249372 100644
--- a/tests/field/codegen/CodegenPoisson.cpp
+++ b/tests/field/codegen/CodegenPoisson.cpp
@@ -20,65 +20,102 @@
 
 #include "Poisson.h"
 #include "blockforest/Initialization.h"
-#include "blockforest/communication/UniformDirectScheme.h"
 #include "blockforest/communication/UniformBufferedScheme.h"
 
 #include "core/Environment.h"
 #include "core/debug/TestSubsystem.h"
+#include "core/math/Constants.h"
 
 #include "field/AddToStorage.h"
 #include "field/communication/PackInfo.h"
+#include "field/vtk/VTKWriter.h"
 
 #include "gui/Gui.h"
 
 #include "stencil/D2Q9.h"
-#include "stencil/D3Q7.h"
-
 #include "timeloop/SweepTimeloop.h"
 
+#include "vtk/VTKOutput.h"
+
 
 using namespace walberla;
 
-typedef GhostLayerField<double,1> ScalarField;
+typedef GhostLayerField<real_t, 1> ScalarField_T;
+
+// U with Dirichlet Boundary
+void initU( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & srcId )
+{
+   for( auto block = blocks->begin(); block != blocks->end(); ++block )
+   {
+      if( blocks->atDomainYMaxBorder( *block ) )
+      {
+         ScalarField_T * src = block->getData< ScalarField_T >( srcId );
+         CellInterval xyz = src->xyzSizeWithGhostLayer();
+         xyz.yMin() = xyz.yMax();
+         for( auto cell = xyz.begin(); cell != xyz.end(); ++cell )
+         {
+            const Vector3< real_t > p = blocks->getBlockLocalCellCenter( *block, *cell );
+            src->get( *cell ) = std::sin( math::pi * p[0] ) * std::sinh( math::pi * p[1] );
+         }
+      }
+   }
+}
 
+// right hand side
+void initF( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID & fId )
+{
+   for( auto block = blocks->begin(); block != blocks->end(); ++block )
+   {
+      ScalarField_T * f = block->getData< ScalarField_T >( fId );
+      CellInterval xyz = f->xyzSize();
+      for( auto cell = xyz.begin(); cell != xyz.end(); ++cell )
+      {
+         const Vector3< real_t > p = blocks->getBlockLocalCellCenter( *block, *cell );
+         f->get( *cell ) = real_t(4) * math::pi * math::pi * std::sin( real_t(2) * math::pi * p[0] ) * std::sinh( real_t(2) * math::pi * p[1] );
+      }
+   }
+}
 
 void testPoisson()
 {
-   uint_t L0 = 50;
-   uint_t xSize = L0;
-   uint_t ySize = L0;
 
-   double h = 1 / (double(L0) + 1);
-   double rhs = 1.0;
+   const uint_t xCells = uint_t(200);
+   const uint_t yCells = uint_t(100);
+   const real_t xSize = real_t(2);
+   const real_t ySize = real_t(1);
+   const real_t dx = xSize / real_c( xCells + uint_t(1) );
+   const real_t dy = ySize / real_c( yCells + uint_t(1) );
+
    // Create blocks
    shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid (
+           math::AABB( real_t(0.5) * dx, real_t(0.5) * dy, real_t(0),
+                       xSize - real_t(0.5) * dx, ySize - real_t(0.5) * dy, dx ),
            uint_t(1) , uint_t(1),  uint_t(1),  // number of blocks in x,y,z direction
-           xSize, ySize, uint_t(1),            // how many cells per block (x,y,z)
-           real_t(1),                          // dx: length of one cell in physical coordinates
+           xCells, yCells, uint_t(1),          // how many cells per block (x,y,z)
            false,                              // one block per process - "false" means all blocks to one process
            false, false, false );              // no periodicity
 
 
-   BlockDataID fieldID = field::addToStorage<ScalarField>(blocks, "Field", real_t(0.0));
+   BlockDataID fieldID = field::addToStorage<ScalarField_T>(blocks, "Field", real_t(0.0));
+   initU( blocks, fieldID );
+
+   BlockDataID fId = field::addToStorage< ScalarField_T >( blocks, "f", real_t(0.0));
+   initF( blocks, fId );
 
    typedef blockforest::communication::UniformBufferedScheme<stencil::D2Q9> CommScheme;
-   typedef field::communication::PackInfo<ScalarField> Packing;
+   typedef field::communication::PackInfo<ScalarField_T> Packing;
    CommScheme commScheme(blocks);
    commScheme.addDataToCommunicate( make_shared<Packing>(fieldID) );
 
    // Create Timeloop
-   const uint_t numberOfTimesteps = uint_t(2500);
+   const uint_t numberOfTimesteps = uint_t(10000);
    SweepTimeloop timeloop ( blocks, numberOfTimesteps );
 
    // Registering the sweep
    timeloop.add() << BeforeFunction(  commScheme, "Communication" )
-                  << Sweep( pystencils::Poisson(fieldID, h, rhs), "Poisson Kernel" );
+                  << Sweep( pystencils::Poisson(fId, fieldID, dx, dy), "Poisson Kernel" );
 
    timeloop.run();
-
-   auto firstBlock = blocks->begin();
-   auto f = firstBlock->getData<ScalarField>( fieldID );
-   WALBERLA_CHECK_FLOAT_EQUAL(f->get(int(L0/2), int(L0/2), 0), real_t(7.28886456002774547e-02));
 }
 
 
@@ -89,5 +126,5 @@ int main( int argc, char ** argv )
 
    testPoisson();
 
-   return 0;
+   return EXIT_SUCCESS;
 }
diff --git a/tests/field/codegen/JacobiKernel.py b/tests/field/codegen/JacobiKernel.py
index c135ded28942cd897e39faed21b259b8073e2adb..3421c8cdd3fc6a0fb88e92ab479fbb2b768a7648 100644
--- a/tests/field/codegen/JacobiKernel.py
+++ b/tests/field/codegen/JacobiKernel.py
@@ -6,9 +6,9 @@ from pystencils_walberla import CodeGeneration, generate_sweep
 with CodeGeneration() as ctx:
     # ----- Stencil 2D - created by specifying weights in nested list --------------------------
     src, dst = ps.fields("src, src_tmp: [2D]", layout='fzyx')
-    stencil = [[1, 2, 3],
-               [4, 5, 6],
-               [7, 8, 9]]
+    stencil = [[1.11, 2.22, 3.33],
+               [4.44, 5.55, 6.66],
+               [7.77, 8.88, 9.99]]
     assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=1 / np.sum(stencil))
     generate_sweep(ctx, 'JacobiKernel2D', assignments, field_swaps=[(src, dst)])
 
diff --git a/tests/field/codegen/Poisson.py b/tests/field/codegen/Poisson.py
index 9cf604064eac2e8feff950e841642f9cf9b31e3d..df3e9589e7ddb7d6a8101985f76fcd4ab8fddd2d 100644
--- a/tests/field/codegen/Poisson.py
+++ b/tests/field/codegen/Poisson.py
@@ -4,13 +4,15 @@ from pystencils_walberla import CodeGeneration, generate_sweep
 
 
 with CodeGeneration() as ctx:
-    # ----- Solving the 2D Poisson equation with constant rhs --------------------------
-    rhs = sp.Symbol("rhs")
-    h = sp.Symbol("h")
-    src, dst = ps.fields("src, src_tmp: [2D]", layout='fzyx')
+    # ----- Solving the 2D Poisson equation with rhs --------------------------
+    dx = sp.Symbol("dx")
+    dy = sp.Symbol("dy")
+    src, dst, rhs = ps.fields("src, src_tmp, rhs: [2D]", layout='fzyx')
 
     @ps.kernel
     def kernel_func():
-        dst[0, 0] @= (src[1, 0] + src[-1, 0] +
-                      src[0, 1] + src[0, -1]) / 4 + ((rhs * h**2) / 4)
+        dst[0, 0] @= ((dy**2 * (src[1, 0] + src[-1, 0])) +
+                      (dx**2 * (src[0, 1] + src[0, -1])) -
+                      (rhs[0, 0] * dx**2 * dy**2)) / (2 * (dx**2 + dy**2))
+
     generate_sweep(ctx, 'Poisson', kernel_func, field_swaps=[(src, dst)])