diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 57fa584717c607cae35f6dbdbd967d1b641486a5..411a9bfe9db080b1196e2bf1583a7c51d478f941 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -420,7 +420,7 @@ gcc_8_serial:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
@@ -444,7 +444,7 @@ gcc_8_mpionly:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
@@ -466,7 +466,7 @@ gcc_8_hybrid:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
@@ -487,7 +487,7 @@ gcc_8_serial_dbg:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
@@ -512,7 +512,7 @@ gcc_8_mpionly_dbg:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -535,7 +535,7 @@ gcc_8_hybrid_dbg:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -557,7 +557,7 @@ gcc_8_hybrid_dbg_sp:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -582,7 +582,7 @@ gcc_9_serial:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
@@ -606,7 +606,7 @@ gcc_9_mpionly:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
@@ -628,7 +628,7 @@ gcc_9_hybrid:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
@@ -649,7 +649,7 @@ gcc_9_serial_dbg:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
@@ -674,7 +674,7 @@ gcc_9_mpionly_dbg:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -697,7 +697,7 @@ gcc_9_hybrid_dbg:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -719,7 +719,7 @@ gcc_9_hybrid_dbg_sp:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -744,7 +744,7 @@ gcc_10_serial:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
@@ -768,7 +768,7 @@ gcc_10_mpionly:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
@@ -790,7 +790,7 @@ gcc_10_hybrid:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
@@ -811,7 +811,7 @@ gcc_10_serial_dbg:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
@@ -836,7 +836,7 @@ gcc_10_mpionly_dbg:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -859,7 +859,7 @@ gcc_10_hybrid_dbg:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -881,7 +881,7 @@ gcc_10_hybrid_dbg_sp:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -906,7 +906,7 @@ gcc_11_serial:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
@@ -930,7 +930,7 @@ gcc_11_mpionly:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
@@ -952,7 +952,7 @@ gcc_11_hybrid:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
@@ -973,7 +973,7 @@ gcc_11_serial_dbg:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
@@ -998,7 +998,7 @@ gcc_11_mpionly_dbg:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -1021,7 +1021,7 @@ gcc_11_hybrid_dbg:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -1043,7 +1043,7 @@ gcc_11_hybrid_dbg_sp:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -1348,7 +1348,7 @@ clang_11.0_serial:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
@@ -1372,7 +1372,7 @@ clang_11.0_mpionly:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
@@ -1394,7 +1394,7 @@ clang_11.0_hybrid:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
@@ -1415,7 +1415,7 @@ clang_11.0_serial_dbg:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
@@ -1440,7 +1440,7 @@ clang_11.0_mpionly_dbg:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -1463,7 +1463,7 @@ clang_11.0_hybrid_dbg:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -1485,7 +1485,7 @@ clang_11.0_hybrid_dbg_sp:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -1510,7 +1510,7 @@ clang_12.0_serial:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
@@ -1534,7 +1534,7 @@ clang_12.0_mpionly:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
@@ -1556,7 +1556,7 @@ clang_12.0_hybrid:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
@@ -1577,7 +1577,7 @@ clang_12.0_serial_dbg:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
@@ -1602,7 +1602,7 @@ clang_12.0_mpionly_dbg:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -1625,7 +1625,7 @@ clang_12.0_hybrid_dbg:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -1647,7 +1647,7 @@ clang_12.0_hybrid_dbg_sp:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -1672,7 +1672,7 @@ clang_13.0_serial:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
@@ -1696,7 +1696,7 @@ clang_13.0_mpionly:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_OPENMP: "OFF"
@@ -1718,7 +1718,7 @@ clang_13.0_hybrid:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
@@ -1736,7 +1736,7 @@ clang_13.0_serial_dbg:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_BUILD_WITH_MPI: "OFF"
@@ -1758,7 +1758,7 @@ clang_13.0_mpionly_dbg:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -1778,7 +1778,7 @@ clang_13.0_hybrid_dbg:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
@@ -1798,7 +1798,7 @@ clang_13.0_hybrid_dbg_sp:
       - python3 -m pytest --junitxml=report.xml pystencils_walberla lbmpy_walberla
       - pip3 list
       - cd ..
-      - CC=gcc CXX=g++ pip3 install pycuda
+      - CC=gcc CXX=g++ pip3 install cupy-cuda11x
    variables:
       WALBERLA_BUILD_WITH_CUDA: "ON"
       CMAKE_BUILD_TYPE: "DebugOptimized"
diff --git a/python/waLBerla/__init__.py b/python/waLBerla/__init__.py
index 6e1e3ebabb56ba1e2f4307ef4c30876d921c3217..a99420105c1b21eed67f8ede0e4a463c0418dcc3 100644
--- a/python/waLBerla/__init__.py
+++ b/python/waLBerla/__init__.py
@@ -42,10 +42,10 @@ if cpp_available:
         from .field_extension import extend as extend_field
         extend_field(field)  # noqa: F405
 
-    if 'cuda' in globals():
-        sys.modules[__name__ + '.cuda'] = cuda  # noqa: F405
-        from .cuda_extension import extend as extend_cuda
-        extend_cuda(cuda)  # noqa: F405
+    if 'gpu' in globals():
+        sys.modules[__name__ + '.gpu'] = gpu  # noqa: F405
+        from .gpu_extension import extend as extend_gpu
+        extend_gpu(gpu)  # noqa: F405
     if 'mpi' in globals():
         sys.modules[__name__ + '.mpi'] = mpi  # noqa: F405
 else:
diff --git a/python/waLBerla/cuda_extension.py b/python/waLBerla/cuda_extension.py
deleted file mode 100644
index c7e74a528fff3605d05096908fd679afd74595e9..0000000000000000000000000000000000000000
--- a/python/waLBerla/cuda_extension.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from pycuda.gpuarray import GPUArray
-import numpy as np
-from .field_extension import normalize_ghostlayer_info
-
-
-def to_gpu_array(f, with_ghost_layers=True):
-    """Converts a waLBerla GPUField to a pycuda GPUArray"""
-    if not f:
-        return None
-    dtype = np.dtype(f.dtypeStr)
-    strides = [dtype.itemsize * a for a in f.strides]
-    res = GPUArray(f.sizeWithGhostLayers, dtype, gpudata=f.ptr, strides=strides)
-    if with_ghost_layers is True:
-        return res
-
-    ghost_layers = normalize_ghostlayer_info(f, with_ghost_layers)
-    cutoff = [f.nrOfGhostLayers - gl for gl in ghost_layers]
-    res = res[cutoff[0]:-cutoff[0] if cutoff[0] > 0 else None,
-              cutoff[1]:-cutoff[1] if cutoff[1] > 0 else None,
-              cutoff[2]:-cutoff[2] if cutoff[2] > 0 else None,
-              :]
-    return res
-
-
-def extend(cpp_cuda_module):
-    cpp_cuda_module.toGpuArray = to_gpu_array
diff --git a/python/waLBerla/gpu_extension.py b/python/waLBerla/gpu_extension.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d80b38c992b892910ba15292fc137732d641952
--- /dev/null
+++ b/python/waLBerla/gpu_extension.py
@@ -0,0 +1,33 @@
+import cupy as cp
+from cupy.cuda import MemoryPointer, UnownedMemory
+import numpy as np
+from .field_extension import normalize_ghostlayer_info
+
+
+def to_gpu_array(f, with_ghost_layers=True):
+    """Converts a waLBerla GPUField to a cupy ndarray"""
+    if not f:
+        return None
+    dtype = np.dtype(f.dtypeStr)
+    strides = [dtype.itemsize * a for a in f.strides]
+
+    allocated_bytes = np.prod(f.allocSize) * dtype.itemsize
+    memory_pointer = MemoryPointer(UnownedMemory(f.ptr, allocated_bytes, f), 0)
+
+    res = cp.ndarray(shape=f.sizeWithGhostLayers, dtype=dtype,
+                     memptr=memory_pointer, strides=strides)
+
+    if with_ghost_layers is True:
+        return res
+
+    ghost_layers = normalize_ghostlayer_info(f, with_ghost_layers)
+    cutoff = [f.nrOfGhostLayers - gl for gl in ghost_layers]
+    res = res[slice(cutoff[0], -cutoff[0], 1) if cutoff[0] > 0 else slice(None, None, None),
+              slice(cutoff[1], -cutoff[1], 1) if cutoff[1] > 0 else slice(None, None, None),
+              slice(cutoff[2], -cutoff[2], 1) if cutoff[2] > 0 else slice(None, None, None),
+              slice(None, None, None)]
+    return res
+
+
+def extend(cpp_gpu_module):
+    cpp_gpu_module.toGpuArray = to_gpu_array
diff --git a/src/python_coupling/export/GPUExport.impl.h b/src/python_coupling/export/GPUExport.impl.h
index cffbc245e985ba208b50569a2bfc3125f61c0e6a..4fb0671702715b6104d8ffb8437d151dc3ca66b1 100644
--- a/src/python_coupling/export/GPUExport.impl.h
+++ b/src/python_coupling/export/GPUExport.impl.h
@@ -52,7 +52,6 @@ using namespace pybind11::literals;
    uint64_t gpufield_ptr(const GpuField_T & gpuField)
    {
       return reinterpret_cast<uint64_t>(gpuField.pitchedPtr().ptr);
-      // return gpuField.pitchedPtr();
    }
 
    template<typename GpuField_T>
@@ -115,7 +114,7 @@ using namespace pybind11::literals;
                             py::object &dtype, uint_t fs, uint_t gl, Layout layout,
                             bool usePitchedMem )
          : blocks_( blocks ), name_( name ), dtype_(dtype), fs_( fs ),
-           gl_(gl),layout_( layout), usePitchedMem_(usePitchedMem), found_(true)
+           gl_(gl),layout_( layout), usePitchedMem_(usePitchedMem)
       {}
 
       template< typename GpuField_T>
@@ -138,7 +137,7 @@ using namespace pybind11::literals;
       uint_t gl_;
       Layout layout_;
       bool usePitchedMem_;
-      bool found_;
+      bool found_{true};
    };
 
    template<typename... GpuFields>