diff --git a/lbmpy_tests/test_poisuille_channel.py b/lbmpy_tests/test_poisuille_channel.py
index 43a761daf130d7df36a9fac84b8c4b897aee6b8c..db8730b608a67297a4746ab92aa1bc948dd13f70 100644
--- a/lbmpy_tests/test_poisuille_channel.py
+++ b/lbmpy_tests/test_poisuille_channel.py
@@ -1,27 +1,52 @@
 """
-This test revealed a problem with OpenCL (https://i10git.cs.fau.de/pycodegen/lbmpy/issues/9#note_9521)
+Test Poiseuille flow against analytical solution
 """
 
 
 import numpy as np
 import pytest
+import sympy as sp
 
 import lbmpy
 from lbmpy.boundaries import NoSlip
 from lbmpy.boundaries.boundaryhandling import LatticeBoltzmannBoundaryHandling
+from lbmpy.creationfunctions import create_lb_update_rule, create_stream_pull_with_output_kernel
 from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
-from lbmpy.session import *
-from pystencils.session import *
+from lbmpy.stencils import get_stencil
 
-previous_vmids = {}
-previous_endresults = {}
+import pystencils as ps
+
+
+def poiseuille_flow(z, H, ext_force_density, dyn_visc):
+    """
+    Analytical solution for plane Poiseuille flow.
+
+    Parameters
+    ----------
+    z : :obj:`float`
+        Distance to the mid plane of the channel.
+    H : :obj:`float`
+        Distance between the boundaries.
+    ext_force_density : :obj:`float`
+        Force density on the fluid normal to the boundaries.
+    dyn_visc : :obj:`float`
+        Dynamic viscosity of the LB fluid.
+
+    """
+    return ext_force_density * 1. / (2 * dyn_visc) * (H**2.0 / 4.0 - z**2.0)
+
+
+rho_0 = 1.2  # density
+eta = 0.2  # kinematic viscosity
+width = 41  # of box
+actual_width = width - 2  # subtract boundary layer from box width
+ext_force_density = 0.2 / actual_width**2  # scale by width to keep stable
 
 
 @pytest.mark.parametrize('target', ('cpu', 'gpu', 'opencl'))
-def test_poiseuille_channel(target):
-    # # Lattice Boltzmann
-    #
-    # ## Definitions
+@pytest.mark.parametrize('stencil_name', ("D2Q9", "D3Q19",))
+def test_poiseuille_channel(target, stencil_name):
+    # OpenCL and Cuda
     if target == 'opencl':
         import pytest
         pytest.importorskip("pyopencl")
@@ -30,65 +55,72 @@ def test_poiseuille_channel(target):
         import pytest
         pytest.importorskip("pycuda")
 
-    # In[2]:
+    # LB parameters
+    lb_stencil = get_stencil(stencil_name)
+    dim = len(lb_stencil[0])
 
-    L = (34, 34)
+    if dim == 2:
+        L = [4, width]
+    elif dim == 3:
+        L = [4, width, 4]
+    else:
+        raise Exception()
+    periodicity = [True, False] + [True] * (dim - 2)
 
-    lb_stencil = get_stencil("D2Q9")
-    eta = 1
     omega = lbmpy.relaxationrates.relaxation_rate_from_lattice_viscosity(eta)
 
-    f_pre = 0.00001
-
     # ## Data structures
-
-    # In[3]:
-
-    dh = ps.create_data_handling(L, periodicity=(True, False), default_target=target)
-
-    opts = {'cpu_openmp': True,
-            'cpu_vectorize_info': None,
-            'target': dh.default_target}
+    dh = ps.create_data_handling(L, periodicity=periodicity, default_target=target)
 
     src = dh.add_array('src', values_per_cell=len(lb_stencil))
     dst = dh.add_array_like('dst', 'src')
     ρ = dh.add_array('rho', latex_name='\\rho')
     u = dh.add_array('u', values_per_cell=dh.dim)
 
-    # In[4]:
-
+    # LB Setup
     collision = create_lb_update_rule(stencil=lb_stencil,
                                       relaxation_rate=omega,
+                                      method="trt",
                                       compressible=True,
-                                      force_model='guo',
-                                      force=sp.Matrix([f_pre, 0]),
+                                      force_model="schiller",
+                                      force=sp.Matrix([ext_force_density] + [0] * (dim - 1)),
                                       kernel_type='collide_only',
                                       optimization={'symbolic_field': src})
 
     stream = create_stream_pull_with_output_kernel(collision.method, src, dst, {'velocity': u})
 
-    lbbh = LatticeBoltzmannBoundaryHandling(collision.method, dh, src.name, target=dh.default_target)
+    opts = {'cpu_openmp': False,
+            'cpu_vectorize_info': None,
+            'target': dh.default_target}
 
     stream_kernel = ps.create_kernel(stream, **opts).compile()
     collision_kernel = ps.create_kernel(collision, **opts).compile()
 
-    # ## Set up the simulation
+    # Boundaries
+    lbbh = LatticeBoltzmannBoundaryHandling(collision.method, dh, src.name, target=dh.default_target)
 
-    # In[5]:
+    # ## Set up the simulation
 
-    init = macroscopic_values_setter(collision.method, velocity=(0,)*dh.dim,
+    init = macroscopic_values_setter(collision.method, velocity=(0,) * dh.dim,
                                      pdfs=src.center_vector, density=ρ.center)
     init_kernel = ps.create_kernel(init, ghost_layers=0).compile()
 
     noslip = NoSlip()
-    lbbh.set_boundary(noslip, make_slice[:, :4])
-    lbbh.set_boundary(noslip, make_slice[:, -4:])
+    wall_thickness = 2
+    if dim == 2:
+        lbbh.set_boundary(noslip, ps.make_slice[:, :wall_thickness])
+        lbbh.set_boundary(noslip, ps.make_slice[:, -wall_thickness:])
+    elif dim == 3:
+        lbbh.set_boundary(noslip, ps.make_slice[:, :wall_thickness, :])
+        lbbh.set_boundary(noslip, ps.make_slice[:, -wall_thickness:, :])
+    else:
+        raise Exception()
 
     for bh in lbbh, :
         assert len(bh._boundary_object_to_boundary_info) == 1, "Restart kernel to clear boundaries"
 
     def init():
-        dh.fill(ρ.name, 1)
+        dh.fill(ρ.name, rho_0)
         dh.fill(u.name, np.nan, ghost_layers=True, inner_ghost_layers=True)
         dh.fill(u.name, 0)
 
@@ -98,10 +130,11 @@ def test_poiseuille_channel(target):
 
     sync_pdfs = dh.synchronization_function([src.name])
 
+    # Time loop
     def time_loop(steps):
         dh.all_to_gpu()
-        vmid = np.empty((2, steps//10+1))
         i = -1
+        last_max_vel = -1
         for i in range(steps):
             dh.run_kernel(collision_kernel)
             sync_pdfs()
@@ -110,71 +143,41 @@ def test_poiseuille_channel(target):
 
             dh.swap(src.name, dst.name)
 
-            if i % 10 == 0:
+            # Consider early termination
+            if i % 100 == 0:
                 if u.name in dh.gpu_arrays:
                     dh.to_cpu(u.name)
                 uu = dh.gather_array(u.name)
-                uu = uu[L[0]//2-1:L[0]//2+1, L[1]//2-1:L[1]//2+1, 0].mean()
-                vmid[:, i//10] = [i, uu]
-                if 1/np.sqrt(3) < uu:
-                    break
-        if dh.is_on_gpu(u.name):
-            dh.to_cpu(u.name)
-
-        return vmid[:, :i//10+1]
+                # average periodic directions
+                if dim == 3:  # dont' swap order
+                    uu = np.average(uu, axis=2)
+                uu = np.average(uu, axis=0)
 
-    # In[7]:
-
-    # def plot():
-
-        # plt.subplot(2, 2, 1)
-        # plt.title("$u$")
-        # plt.xlabel("$x$")
-        # plt.ylabel("$y$")
-        # plt.vector_field_magnitude(uu)
-        # plt.colorbar()
-
-        # plt.subplot(2, 2, 2)
-        # plt.title("$u$")
-        # plt.xlabel("$x/2$")
-        # plt.ylabel("$y/2$")
-        # plt.vector_field(uu, step=2)
-
-        # actualwidth = np.sum(1-np.isnan(uu[0, :, 0]))
-        # uref = f_pre*actualwidth**2/(8*(eta))
-
-        # plt.subplot(2, 2, 3)
-        # plt.title("flow profile")
-        # plt.xlabel("$y$")
-        # plt.ylabel(r"$u_x$")
-        # plt.plot((uu[L[0]//2-1, :, 0]+uu[L[0]//2, :, 0])/2)
+                max_vel = np.nanmax(uu)
+                if np.abs(max_vel / last_max_vel - 1) < 5E-6:
+                    break
+                last_max_vel = max_vel
 
-        # plt.subplot(2, 2, 4)
-        # plt.title("convergence")
-        # plt.xlabel("$t$")
-        # plt.plot()
+        # cut off wall regions
+        uu = uu[wall_thickness:-wall_thickness]
 
-    # ## Run the simulation
+        # correct for f/2 term
+        uu -= np.array([ext_force_density / 2 / rho_0] + [0] * (dim - 1))
 
-    # In[8]:
+        return uu
 
     init()
-    vmid = time_loop(1000)
-    # plot()
-
-    uu = dh.gather_array(u.name)
+    # Simulation
+    profile = time_loop(5000)
 
-    for target, prev_endresult in previous_endresults.items():
-        assert np.allclose(uu[4:-4, 4:-4, 0], prev_endresult[4:-4, 4:-4, 0],
-                           atol=1e-5), f'uu does not agree with result from {target}'
+    # compare against analytical solution
+    # The profile is of shape (n,3). Force is in x-direction
+    y = np.arange(len(profile[:, 0]))
+    mid = (y[-1] - y[0]) / 2  # Mid point of channel
 
-    for target, prev_vmid in previous_vmids.items():
-        assert np.allclose(vmid, prev_vmid, atol=1e-5), f'vmid does not agree with result from {target}'
+    expected = poiseuille_flow((y - mid), actual_width, ext_force_density, rho_0 * eta)
 
-    # uref = f_pre*actualwidth**2/(8*(eta))
-    # actualwidth = np.sum(1-np.isnan(uu[0, :, 0]))
-    # assert np.allclose(vmid[1, -1]/uref, 1, atol=1e-3)
-    # print(vmid[1, -1])
+    np.testing.assert_allclose(profile[:, 0], expected, rtol=0.006)
 
-    previous_vmids[target] = vmid
-    previous_endresults[target] = uu
+    # Test zero vel in other directions
+    np.testing.assert_allclose(profile[:, 1:], np.zeros_like(profile[:, 1:]), atol=1E-9)