From 116b46aab39da6f9fcf76fa78461b58b3dcd44a7 Mon Sep 17 00:00:00 2001 From: Markus Holzer <markus.holzer@fau.de> Date: Mon, 22 Nov 2021 22:30:23 +0000 Subject: [PATCH] Remove OpenCL support --- CHANGELOG.md | 6 ++++ lbmpy/advanced_streaming/communication.py | 23 +++---------- lbmpy/lbstep.py | 2 +- .../test_fully_periodic_flow.py | 21 +++--------- .../test_periodic_pipe_with_force.py | 10 +----- lbmpy_tests/test_boundary_handling.py | 12 +++---- lbmpy_tests/test_code_hashequivalence.py | 22 ------------ lbmpy_tests/test_lbstep.py | 34 ------------------- lbmpy_tests/test_poisuille_channel.py | 10 ++---- lbmpy_tests/test_shear_flow.py | 9 ++--- 10 files changed, 27 insertions(+), 122 deletions(-) create mode 100644 CHANGELOG.md delete mode 100644 lbmpy_tests/test_code_hashequivalence.py diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..f0202916 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,6 @@ +# Change Log + +## Unreleased + +### Removed +* Removing OpenCL support because it is not supported by pystencils anymore diff --git a/lbmpy/advanced_streaming/communication.py b/lbmpy/advanced_streaming/communication.py index 5d67f68d..2345b32a 100644 --- a/lbmpy/advanced_streaming/communication.py +++ b/lbmpy/advanced_streaming/communication.py @@ -104,8 +104,7 @@ def get_communication_slices( def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice, - domain_size=None, target=Target.GPU, - opencl_queue=None, opencl_ctx=None): + domain_size=None, target=Target.GPU): """Copies a rectangular array slice onto another non-overlapping array slice""" from pystencils.gpucuda.kernelcreation import create_cuda_kernel @@ -136,9 +135,6 @@ def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice, if target == Target.GPU: from pystencils.gpucuda import make_python_function return make_python_function(ast) - elif target == Target.OPENCL: - from pystencils.opencl import make_python_function - return make_python_function(ast, opencl_queue, opencl_ctx) else: raise ValueError('Invalid target:', target) @@ -147,22 +143,17 @@ class LBMPeriodicityHandling: def __init__(self, stencil, data_handling, pdf_field_name, streaming_pattern='pull', ghost_layers=1, - opencl_queue=None, opencl_ctx=None, pycuda_direct_copy=True): """ Periodicity Handling for Lattice Boltzmann Streaming. - **On the usage with cuda/opencl:** + **On the usage with cuda:** - pycuda allows the copying of sliced arrays within device memory using the numpy syntax, e.g. `dst[:,0] = src[:,-1]`. In this implementation, this is the default for periodicity handling. Alternatively, if you set `pycuda_direct_copy=False`, GPU kernels are generated and compiled. The compiled kernels are almost twice as fast in execution as pycuda array copying, but especially for large stencils like D3Q27, their compilation can take up to 20 seconds. Choose your weapon depending on your use case. - - - pyopencl does not support copying of non-contiguous sliced arrays, so the usage of compiled - copy kernels is forced on us. On the positive side, compilation of the OpenCL kernels appears - to be about four times faster. """ if not isinstance(data_handling, SerialDataHandling): raise ValueError('Only serial data handling is supported!') @@ -172,7 +163,7 @@ class LBMPeriodicityHandling: self.dh = data_handling target = data_handling.default_target - assert target in [Target.CPU, Target.GPU, Target.OPENCL] + assert target in [Target.CPU, Target.GPU] self.pdf_field_name = pdf_field_name self.ghost_layers = ghost_layers @@ -180,8 +171,6 @@ class LBMPeriodicityHandling: self.inplace_pattern = is_inplace(streaming_pattern) self.target = target self.cpu = target == Target.CPU - self.opencl_queue = opencl_queue - self.opencl_ctx = opencl_ctx self.pycuda_direct_copy = target == Target.GPU and pycuda_direct_copy def is_copy_direction(direction): @@ -205,7 +194,7 @@ class LBMPeriodicityHandling: ghost_layers=ghost_layers) self.comm_slices.append(list(chain.from_iterable(v for k, v in slices_per_comm_dir.items()))) - if target == Target.OPENCL or (target == Target.GPU and not pycuda_direct_copy): + if target == Target.GPU and not pycuda_direct_copy: self.device_copy_kernels = [] for timestep in timesteps: self.device_copy_kernels.append(self._compile_copy_kernels(timestep)) @@ -227,9 +216,7 @@ class LBMPeriodicityHandling: kernels = [] for src, dst in self.comm_slices[timestep.idx]: kernels.append( - periodic_pdf_copy_kernel( - pdf_field, src, dst, target=self.target, - opencl_queue=self.opencl_queue, opencl_ctx=self.opencl_ctx)) + periodic_pdf_copy_kernel(pdf_field, src, dst, target=self.target)) return kernels def _periodicity_handling_gpu(self, prev_timestep): diff --git a/lbmpy/lbstep.py b/lbmpy/lbstep.py index 15e73fe6..44aea72a 100644 --- a/lbmpy/lbstep.py +++ b/lbmpy/lbstep.py @@ -74,7 +74,7 @@ class LatticeBoltzmannStep: self.density_data_name = name + "_density" if density_data_name is None else density_data_name self.density_data_index = density_data_index - self._gpu = target == Target.GPU or target == Target.OPENCL + self._gpu = target == Target.GPU layout = lbm_optimisation.field_layout alignment = False diff --git a/lbmpy_tests/advanced_streaming/test_fully_periodic_flow.py b/lbmpy_tests/advanced_streaming/test_fully_periodic_flow.py index 9804a1e3..0c37cfc3 100644 --- a/lbmpy_tests/advanced_streaming/test_fully_periodic_flow.py +++ b/lbmpy_tests/advanced_streaming/test_fully_periodic_flow.py @@ -25,27 +25,15 @@ try: except Exception: pass -try: - import pystencils.opencl.autoinit - from pystencils.opencl.opencljit import get_global_cl_queue - if get_global_cl_queue() is not None: - targets += [Target.OPENCL] -except Exception: - pass - @pytest.mark.parametrize('target', targets) @pytest.mark.parametrize('stencil', [Stencil.D2Q9, Stencil.D3Q19, Stencil.D3Q27]) @pytest.mark.parametrize('streaming_pattern', streaming_patterns) @pytest.mark.longrun def test_fully_periodic_flow(target, stencil, streaming_pattern): - - if target == Target.OPENCL: - opencl_queue = get_global_cl_queue() - else: - opencl_queue = None - - gpu = target in [Target.GPU, Target.OPENCL] + gpu = False + if target == Target.GPU: + gpu = True # Stencil stencil = LBStencil(stencil) @@ -59,8 +47,7 @@ def test_fully_periodic_flow(target, stencil, streaming_pattern): domain_size = (30,) * stencil.D periodicity = (True,) * stencil.D - dh = create_data_handling(domain_size=domain_size, periodicity=periodicity, - default_target=target, opencl_queue=opencl_queue) + dh = create_data_handling(domain_size=domain_size, periodicity=periodicity, default_target=target) pdfs = dh.add_array('pdfs', stencil.Q) if not inplace: diff --git a/lbmpy_tests/advanced_streaming/test_periodic_pipe_with_force.py b/lbmpy_tests/advanced_streaming/test_periodic_pipe_with_force.py index 7ee8a608..42b6671a 100644 --- a/lbmpy_tests/advanced_streaming/test_periodic_pipe_with_force.py +++ b/lbmpy_tests/advanced_streaming/test_periodic_pipe_with_force.py @@ -26,14 +26,6 @@ try: except Exception: pass -try: - import pystencils.opencl.autoinit - from pystencils.opencl.opencljit import get_global_cl_queue - if get_global_cl_queue() is not None: - targets += [Target.OPENCL] -except Exception: - pass - class PeriodicPipeFlow: def __init__(self, stencil, streaming_pattern, wall_boundary=None, target=Target.CPU): @@ -42,7 +34,7 @@ class PeriodicPipeFlow: wall_boundary = NoSlip() self.target = target - self.gpu = target in [Target.GPU, Target.OPENCL] + self.gpu = target in [Target.GPU] # Stencil self.stencil = stencil diff --git a/lbmpy_tests/test_boundary_handling.py b/lbmpy_tests/test_boundary_handling.py index fa285f6b..a60c90fb 100644 --- a/lbmpy_tests/test_boundary_handling.py +++ b/lbmpy_tests/test_boundary_handling.py @@ -4,7 +4,7 @@ import pytest from lbmpy.boundaries import NoSlip, UBB, SimpleExtrapolationOutflow, ExtrapolationOutflow, \ FixedDensity, DiffusionDirichlet, NeumannByCopy, StreamInConstant, FreeSlip from lbmpy.boundaries.boundaryhandling import LatticeBoltzmannBoundaryHandling -from lbmpy.creationfunctions import create_lb_function, create_lb_method, LBMConfig, LBMOptimisation +from lbmpy.creationfunctions import create_lb_function, create_lb_method, LBMConfig from lbmpy.enums import Stencil, Method from lbmpy.geometry import add_box_boundary from lbmpy.lbstep import LatticeBoltzmannStep @@ -22,22 +22,18 @@ def mirror_stencil(direction, mirror_axis): return tuple(direction) -@pytest.mark.parametrize("target", [Target.GPU, Target.CPU, Target.OPENCL]) +@pytest.mark.parametrize("target", [Target.GPU, Target.CPU]) def test_simple(target): if target == Target.GPU: import pytest pytest.importorskip('pycuda') - elif target == Target.OPENCL: - import pytest - pytest.importorskip('pyopencl') - import pystencils.opencl.autoinit dh = create_data_handling((4, 4), parallel=False, default_target=target) dh.add_array('pdfs', values_per_cell=9, cpu=True, gpu=target != Target.CPU) for i in range(9): dh.fill("pdfs", i, value_idx=i, ghost_layers=True) - if target == Target.GPU or target == Target.OPENCL: + if target == Target.GPU: dh.all_to_gpu() lbm_config = LBMConfig(stencil=LBStencil(Stencil.D2Q9), compressible=False, relaxation_rate=1.8) @@ -57,7 +53,7 @@ def test_simple(target): bh.prepare() bh() - if target == Target.GPU or target == Target.OPENCL: + if target == Target.GPU: dh.all_to_cpu() # left lower corner assert (dh.cpu_arrays['pdfs'][0, 0, 6] == 7) diff --git a/lbmpy_tests/test_code_hashequivalence.py b/lbmpy_tests/test_code_hashequivalence.py deleted file mode 100644 index 06ef0ded..00000000 --- a/lbmpy_tests/test_code_hashequivalence.py +++ /dev/null @@ -1,22 +0,0 @@ -from hashlib import sha256 - -from pystencils import Backend, CreateKernelConfig, Target -from lbmpy.creationfunctions import create_lb_ast -from lbmpy.enums import Stencil, Method -from lbmpy.creationfunctions import LBMConfig -from lbmpy.stencils import LBStencil - - -def test_hash_equivalence_llvm(): - import pytest - pytest.importorskip("llvmlite") - from pystencils.llvm.llvmjit import generate_llvm - - ref_value = "f1b1879e304fe8533977c885f2744516dd4964064a7e4ae64fd94b8426d995bb" - - lbm_config = LBMConfig(stencil=LBStencil(Stencil.D2Q9), method=Method.SRT) - config = CreateKernelConfig(target=Target.CPU, backend=Backend.LLVM) - ast = create_lb_ast(lbm_config=lbm_config, config=config) - code = generate_llvm(ast) - hash_value = sha256(str(code).encode()).hexdigest() - assert hash_value == ref_value diff --git a/lbmpy_tests/test_lbstep.py b/lbmpy_tests/test_lbstep.py index e6a6bc63..86131385 100644 --- a/lbmpy_tests/test_lbstep.py +++ b/lbmpy_tests/test_lbstep.py @@ -50,40 +50,6 @@ def test_data_handling_3d(): np.testing.assert_almost_equal(results[0], arr) -def test_data_handling_2d_opencl(): - pytest.importorskip('pyopencl') - import pystencils.opencl.opencljit - pystencils.opencl.opencljit.init_globally() - print("--- LDC 2D test ---") - results = [] - - # Since waLBerla has no OpenCL Backend yet, it is not possible to use the - # parallel Datahandling with OpenCL at the moment - - # TODO: Activate parallel Datahandling if Backend is available - parallel = False - for gpu in [True, False] if gpu_available else [False]: - if parallel and gpu and not hasattr(wLB, 'cuda'): - continue - - print(f"Testing parallel: {parallel}\tgpu: {gpu}") - config = CreateKernelConfig(target=Target.GPU if gpu else Target.CPU, - gpu_indexing_params=MappingProxyType({'block_size': (8, 4, 2)})) - if parallel: - from pystencils.datahandling import ParallelDataHandling - blocks = wLB.createUniformBlockGrid(blocks=(2, 3, 1), cellsPerBlock=(5, 5, 1), - oneBlockPerProcess=False) - dh = ParallelDataHandling(blocks, dim=2) - rho = ldc_setup(data_handling=dh, config=config) - results.append(rho) - else: - rho = ldc_setup(domain_size=(10, 15), parallel=False, config=config) - results.append(rho) - for i, arr in enumerate(results[1:]): - print(f"Testing equivalence version 0 with version {i + 1}") - np.testing.assert_almost_equal(results[0], arr) - - def test_data_handling_2d(): print("--- LDC 2D test ---") results = [] diff --git a/lbmpy_tests/test_poisuille_channel.py b/lbmpy_tests/test_poisuille_channel.py index d753d4de..354b2057 100644 --- a/lbmpy_tests/test_poisuille_channel.py +++ b/lbmpy_tests/test_poisuille_channel.py @@ -10,15 +10,11 @@ import pystencils as ps from poiseuille import poiseuille_channel -@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU, ps.Target.OPENCL)) +@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU)) @pytest.mark.parametrize('stencil_name', (Stencil.D2Q9, Stencil.D3Q19)) def test_poiseuille_channel(target, stencil_name): - # OpenCL and Cuda - if target == ps.Target.OPENCL: - import pytest - pytest.importorskip("pyopencl") - import pystencils.opencl.autoinit - elif target == ps.Target.GPU: + # Cuda + if target == ps.Target.GPU: import pytest pytest.importorskip("pycuda") diff --git a/lbmpy_tests/test_shear_flow.py b/lbmpy_tests/test_shear_flow.py index a7c4f24a..ccb7fc20 100644 --- a/lbmpy_tests/test_shear_flow.py +++ b/lbmpy_tests/test_shear_flow.py @@ -60,14 +60,11 @@ shear_velocity = 0.2 # scale by width to keep stable t_max = 2000 -@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU, ps.Target.OPENCL)) +@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU)) @pytest.mark.parametrize('stencil_name', (Stencil.D2Q9, Stencil.D3Q19)) def test_shear_flow(target, stencil_name): - # OpenCL and Cuda - if target == ps.Target.OPENCL: - pytest.importorskip("pyopencl") - import pystencils.opencl.autoinit - elif target == ps.Target.GPU: + # Cuda + if target == ps.Target.GPU: pytest.importorskip("pycuda") # LB parameters -- GitLab