From 116b46aab39da6f9fcf76fa78461b58b3dcd44a7 Mon Sep 17 00:00:00 2001
From: Markus Holzer <markus.holzer@fau.de>
Date: Mon, 22 Nov 2021 22:30:23 +0000
Subject: [PATCH] Remove OpenCL support

---
 CHANGELOG.md                                  |  6 ++++
 lbmpy/advanced_streaming/communication.py     | 23 +++----------
 lbmpy/lbstep.py                               |  2 +-
 .../test_fully_periodic_flow.py               | 21 +++---------
 .../test_periodic_pipe_with_force.py          | 10 +-----
 lbmpy_tests/test_boundary_handling.py         | 12 +++----
 lbmpy_tests/test_code_hashequivalence.py      | 22 ------------
 lbmpy_tests/test_lbstep.py                    | 34 -------------------
 lbmpy_tests/test_poisuille_channel.py         | 10 ++----
 lbmpy_tests/test_shear_flow.py                |  9 ++---
 10 files changed, 27 insertions(+), 122 deletions(-)
 create mode 100644 CHANGELOG.md
 delete mode 100644 lbmpy_tests/test_code_hashequivalence.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000..f0202916
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,6 @@
+# Change Log
+
+## Unreleased
+
+### Removed
+* Removing OpenCL support because it is not supported by pystencils anymore
diff --git a/lbmpy/advanced_streaming/communication.py b/lbmpy/advanced_streaming/communication.py
index 5d67f68d..2345b32a 100644
--- a/lbmpy/advanced_streaming/communication.py
+++ b/lbmpy/advanced_streaming/communication.py
@@ -104,8 +104,7 @@ def get_communication_slices(
 
 
 def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice,
-                             domain_size=None, target=Target.GPU,
-                             opencl_queue=None, opencl_ctx=None):
+                             domain_size=None, target=Target.GPU):
     """Copies a rectangular array slice onto another non-overlapping array slice"""
     from pystencils.gpucuda.kernelcreation import create_cuda_kernel
 
@@ -136,9 +135,6 @@ def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice,
     if target == Target.GPU:
         from pystencils.gpucuda import make_python_function
         return make_python_function(ast)
-    elif target == Target.OPENCL:
-        from pystencils.opencl import make_python_function
-        return make_python_function(ast, opencl_queue, opencl_ctx)
     else:
         raise ValueError('Invalid target:', target)
 
@@ -147,22 +143,17 @@ class LBMPeriodicityHandling:
 
     def __init__(self, stencil, data_handling, pdf_field_name,
                  streaming_pattern='pull', ghost_layers=1,
-                 opencl_queue=None, opencl_ctx=None,
                  pycuda_direct_copy=True):
         """
             Periodicity Handling for Lattice Boltzmann Streaming.
 
-            **On the usage with cuda/opencl:** 
+            **On the usage with cuda:**
             - pycuda allows the copying of sliced arrays within device memory using the numpy syntax,
             e.g. `dst[:,0] = src[:,-1]`. In this implementation, this is the default for periodicity
             handling. Alternatively, if you set `pycuda_direct_copy=False`, GPU kernels are generated and
             compiled. The compiled kernels are almost twice as fast in execution as pycuda array copying,
             but especially for large stencils like D3Q27, their compilation can take up to 20 seconds. 
             Choose your weapon depending on your use case.
-
-            - pyopencl does not support copying of non-contiguous sliced arrays, so the usage of compiled
-            copy kernels is forced on us. On the positive side, compilation of the OpenCL kernels appears
-            to be about four times faster.
         """
         if not isinstance(data_handling, SerialDataHandling):
             raise ValueError('Only serial data handling is supported!')
@@ -172,7 +163,7 @@ class LBMPeriodicityHandling:
         self.dh = data_handling
 
         target = data_handling.default_target
-        assert target in [Target.CPU, Target.GPU, Target.OPENCL]
+        assert target in [Target.CPU, Target.GPU]
 
         self.pdf_field_name = pdf_field_name
         self.ghost_layers = ghost_layers
@@ -180,8 +171,6 @@ class LBMPeriodicityHandling:
         self.inplace_pattern = is_inplace(streaming_pattern)
         self.target = target
         self.cpu = target == Target.CPU
-        self.opencl_queue = opencl_queue
-        self.opencl_ctx = opencl_ctx
         self.pycuda_direct_copy = target == Target.GPU and pycuda_direct_copy
 
         def is_copy_direction(direction):
@@ -205,7 +194,7 @@ class LBMPeriodicityHandling:
                                                            ghost_layers=ghost_layers)
             self.comm_slices.append(list(chain.from_iterable(v for k, v in slices_per_comm_dir.items())))
 
-        if target == Target.OPENCL or (target == Target.GPU and not pycuda_direct_copy):
+        if target == Target.GPU and not pycuda_direct_copy:
             self.device_copy_kernels = []
             for timestep in timesteps:
                 self.device_copy_kernels.append(self._compile_copy_kernels(timestep))
@@ -227,9 +216,7 @@ class LBMPeriodicityHandling:
         kernels = []
         for src, dst in self.comm_slices[timestep.idx]:
             kernels.append(
-                periodic_pdf_copy_kernel(
-                    pdf_field, src, dst, target=self.target,
-                    opencl_queue=self.opencl_queue, opencl_ctx=self.opencl_ctx))
+                periodic_pdf_copy_kernel(pdf_field, src, dst, target=self.target))
         return kernels
 
     def _periodicity_handling_gpu(self, prev_timestep):
diff --git a/lbmpy/lbstep.py b/lbmpy/lbstep.py
index 15e73fe6..44aea72a 100644
--- a/lbmpy/lbstep.py
+++ b/lbmpy/lbstep.py
@@ -74,7 +74,7 @@ class LatticeBoltzmannStep:
         self.density_data_name = name + "_density" if density_data_name is None else density_data_name
         self.density_data_index = density_data_index
 
-        self._gpu = target == Target.GPU or target == Target.OPENCL
+        self._gpu = target == Target.GPU
         layout = lbm_optimisation.field_layout
 
         alignment = False
diff --git a/lbmpy_tests/advanced_streaming/test_fully_periodic_flow.py b/lbmpy_tests/advanced_streaming/test_fully_periodic_flow.py
index 9804a1e3..0c37cfc3 100644
--- a/lbmpy_tests/advanced_streaming/test_fully_periodic_flow.py
+++ b/lbmpy_tests/advanced_streaming/test_fully_periodic_flow.py
@@ -25,27 +25,15 @@ try:
 except Exception:
     pass
 
-try:
-    import pystencils.opencl.autoinit
-    from pystencils.opencl.opencljit import get_global_cl_queue
-    if get_global_cl_queue() is not None:
-        targets += [Target.OPENCL]
-except Exception:
-    pass
-
 
 @pytest.mark.parametrize('target', targets)
 @pytest.mark.parametrize('stencil', [Stencil.D2Q9, Stencil.D3Q19, Stencil.D3Q27])
 @pytest.mark.parametrize('streaming_pattern', streaming_patterns)
 @pytest.mark.longrun
 def test_fully_periodic_flow(target, stencil, streaming_pattern):
-
-    if target == Target.OPENCL:
-        opencl_queue = get_global_cl_queue()
-    else:
-        opencl_queue = None
-
-    gpu = target in [Target.GPU, Target.OPENCL]
+    gpu = False
+    if target == Target.GPU:
+        gpu = True
 
     #   Stencil
     stencil = LBStencil(stencil)
@@ -59,8 +47,7 @@ def test_fully_periodic_flow(target, stencil, streaming_pattern):
     domain_size = (30,) * stencil.D
     periodicity = (True,) * stencil.D
 
-    dh = create_data_handling(domain_size=domain_size, periodicity=periodicity,
-                              default_target=target, opencl_queue=opencl_queue)
+    dh = create_data_handling(domain_size=domain_size, periodicity=periodicity, default_target=target)
 
     pdfs = dh.add_array('pdfs', stencil.Q)
     if not inplace:
diff --git a/lbmpy_tests/advanced_streaming/test_periodic_pipe_with_force.py b/lbmpy_tests/advanced_streaming/test_periodic_pipe_with_force.py
index 7ee8a608..42b6671a 100644
--- a/lbmpy_tests/advanced_streaming/test_periodic_pipe_with_force.py
+++ b/lbmpy_tests/advanced_streaming/test_periodic_pipe_with_force.py
@@ -26,14 +26,6 @@ try:
 except Exception:
     pass
 
-try:
-    import pystencils.opencl.autoinit
-    from pystencils.opencl.opencljit import get_global_cl_queue
-    if get_global_cl_queue() is not None:
-        targets += [Target.OPENCL]
-except Exception:
-    pass
-
 
 class PeriodicPipeFlow:
     def __init__(self, stencil, streaming_pattern, wall_boundary=None, target=Target.CPU):
@@ -42,7 +34,7 @@ class PeriodicPipeFlow:
             wall_boundary = NoSlip()
 
         self.target = target
-        self.gpu = target in [Target.GPU, Target.OPENCL]
+        self.gpu = target in [Target.GPU]
 
         #   Stencil
         self.stencil = stencil
diff --git a/lbmpy_tests/test_boundary_handling.py b/lbmpy_tests/test_boundary_handling.py
index fa285f6b..a60c90fb 100644
--- a/lbmpy_tests/test_boundary_handling.py
+++ b/lbmpy_tests/test_boundary_handling.py
@@ -4,7 +4,7 @@ import pytest
 from lbmpy.boundaries import NoSlip, UBB, SimpleExtrapolationOutflow, ExtrapolationOutflow, \
     FixedDensity, DiffusionDirichlet, NeumannByCopy, StreamInConstant, FreeSlip
 from lbmpy.boundaries.boundaryhandling import LatticeBoltzmannBoundaryHandling
-from lbmpy.creationfunctions import create_lb_function, create_lb_method, LBMConfig, LBMOptimisation
+from lbmpy.creationfunctions import create_lb_function, create_lb_method, LBMConfig
 from lbmpy.enums import Stencil, Method
 from lbmpy.geometry import add_box_boundary
 from lbmpy.lbstep import LatticeBoltzmannStep
@@ -22,22 +22,18 @@ def mirror_stencil(direction, mirror_axis):
     return tuple(direction)
 
 
-@pytest.mark.parametrize("target", [Target.GPU, Target.CPU, Target.OPENCL])
+@pytest.mark.parametrize("target", [Target.GPU, Target.CPU])
 def test_simple(target):
     if target == Target.GPU:
         import pytest
         pytest.importorskip('pycuda')
-    elif target == Target.OPENCL:
-        import pytest
-        pytest.importorskip('pyopencl')
-        import pystencils.opencl.autoinit
 
     dh = create_data_handling((4, 4), parallel=False, default_target=target)
     dh.add_array('pdfs', values_per_cell=9, cpu=True, gpu=target != Target.CPU)
     for i in range(9):
         dh.fill("pdfs", i, value_idx=i, ghost_layers=True)
 
-    if target == Target.GPU or target == Target.OPENCL:
+    if target == Target.GPU:
         dh.all_to_gpu()
 
     lbm_config = LBMConfig(stencil=LBStencil(Stencil.D2Q9), compressible=False, relaxation_rate=1.8)
@@ -57,7 +53,7 @@ def test_simple(target):
     bh.prepare()
     bh()
 
-    if target == Target.GPU or target == Target.OPENCL:
+    if target == Target.GPU:
         dh.all_to_cpu()
     # left lower corner
     assert (dh.cpu_arrays['pdfs'][0, 0, 6] == 7)
diff --git a/lbmpy_tests/test_code_hashequivalence.py b/lbmpy_tests/test_code_hashequivalence.py
deleted file mode 100644
index 06ef0ded..00000000
--- a/lbmpy_tests/test_code_hashequivalence.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from hashlib import sha256
-
-from pystencils import Backend, CreateKernelConfig, Target
-from lbmpy.creationfunctions import create_lb_ast
-from lbmpy.enums import Stencil, Method
-from lbmpy.creationfunctions import LBMConfig
-from lbmpy.stencils import LBStencil
-
-
-def test_hash_equivalence_llvm():
-    import pytest
-    pytest.importorskip("llvmlite")
-    from pystencils.llvm.llvmjit import generate_llvm
-
-    ref_value = "f1b1879e304fe8533977c885f2744516dd4964064a7e4ae64fd94b8426d995bb"
-
-    lbm_config = LBMConfig(stencil=LBStencil(Stencil.D2Q9), method=Method.SRT)
-    config = CreateKernelConfig(target=Target.CPU, backend=Backend.LLVM)
-    ast = create_lb_ast(lbm_config=lbm_config, config=config)
-    code = generate_llvm(ast)
-    hash_value = sha256(str(code).encode()).hexdigest()
-    assert hash_value == ref_value
diff --git a/lbmpy_tests/test_lbstep.py b/lbmpy_tests/test_lbstep.py
index e6a6bc63..86131385 100644
--- a/lbmpy_tests/test_lbstep.py
+++ b/lbmpy_tests/test_lbstep.py
@@ -50,40 +50,6 @@ def test_data_handling_3d():
         np.testing.assert_almost_equal(results[0], arr)
 
 
-def test_data_handling_2d_opencl():
-    pytest.importorskip('pyopencl')
-    import pystencils.opencl.opencljit
-    pystencils.opencl.opencljit.init_globally()
-    print("--- LDC 2D test ---")
-    results = []
-
-    # Since waLBerla has no OpenCL Backend yet, it is not possible to use the
-    # parallel Datahandling with OpenCL at the moment
-
-    # TODO: Activate parallel Datahandling if Backend is available
-    parallel = False
-    for gpu in [True, False] if gpu_available else [False]:
-        if parallel and gpu and not hasattr(wLB, 'cuda'):
-            continue
-
-        print(f"Testing parallel: {parallel}\tgpu: {gpu}")
-        config = CreateKernelConfig(target=Target.GPU if gpu else Target.CPU,
-                                    gpu_indexing_params=MappingProxyType({'block_size': (8, 4, 2)}))
-        if parallel:
-            from pystencils.datahandling import ParallelDataHandling
-            blocks = wLB.createUniformBlockGrid(blocks=(2, 3, 1), cellsPerBlock=(5, 5, 1),
-                                                oneBlockPerProcess=False)
-            dh = ParallelDataHandling(blocks, dim=2)
-            rho = ldc_setup(data_handling=dh, config=config)
-            results.append(rho)
-        else:
-            rho = ldc_setup(domain_size=(10, 15), parallel=False, config=config)
-            results.append(rho)
-    for i, arr in enumerate(results[1:]):
-        print(f"Testing equivalence version 0 with version {i + 1}")
-        np.testing.assert_almost_equal(results[0], arr)
-
-
 def test_data_handling_2d():
     print("--- LDC 2D test ---")
     results = []
diff --git a/lbmpy_tests/test_poisuille_channel.py b/lbmpy_tests/test_poisuille_channel.py
index d753d4de..354b2057 100644
--- a/lbmpy_tests/test_poisuille_channel.py
+++ b/lbmpy_tests/test_poisuille_channel.py
@@ -10,15 +10,11 @@ import pystencils as ps
 from poiseuille import poiseuille_channel
 
 
-@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU, ps.Target.OPENCL))
+@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU))
 @pytest.mark.parametrize('stencil_name', (Stencil.D2Q9, Stencil.D3Q19))
 def test_poiseuille_channel(target, stencil_name):
-    # OpenCL and Cuda
-    if target == ps.Target.OPENCL:
-        import pytest
-        pytest.importorskip("pyopencl")
-        import pystencils.opencl.autoinit
-    elif target == ps.Target.GPU:
+    # Cuda
+    if target == ps.Target.GPU:
         import pytest
         pytest.importorskip("pycuda")
 
diff --git a/lbmpy_tests/test_shear_flow.py b/lbmpy_tests/test_shear_flow.py
index a7c4f24a..ccb7fc20 100644
--- a/lbmpy_tests/test_shear_flow.py
+++ b/lbmpy_tests/test_shear_flow.py
@@ -60,14 +60,11 @@ shear_velocity = 0.2  # scale by width to keep stable
 t_max = 2000
 
 
-@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU, ps.Target.OPENCL))
+@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU))
 @pytest.mark.parametrize('stencil_name', (Stencil.D2Q9, Stencil.D3Q19))
 def test_shear_flow(target, stencil_name):
-    # OpenCL and Cuda
-    if target == ps.Target.OPENCL:
-        pytest.importorskip("pyopencl")
-        import pystencils.opencl.autoinit
-    elif target == ps.Target.GPU:
+    # Cuda
+    if target == ps.Target.GPU:
         pytest.importorskip("pycuda")
 
     # LB parameters
-- 
GitLab