Commit 116b46aa authored by Markus Holzer's avatar Markus Holzer Committed by Jan Hönig
Browse files

Remove OpenCL support

parent 358ebb11
# Change Log
## Unreleased
### Removed
* Removing OpenCL support because it is not supported by pystencils anymore
......@@ -104,8 +104,7 @@ def get_communication_slices(
def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice,
domain_size=None, target=Target.GPU,
opencl_queue=None, opencl_ctx=None):
domain_size=None, target=Target.GPU):
"""Copies a rectangular array slice onto another non-overlapping array slice"""
from pystencils.gpucuda.kernelcreation import create_cuda_kernel
......@@ -136,9 +135,6 @@ def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice,
if target == Target.GPU:
from pystencils.gpucuda import make_python_function
return make_python_function(ast)
elif target == Target.OPENCL:
from pystencils.opencl import make_python_function
return make_python_function(ast, opencl_queue, opencl_ctx)
else:
raise ValueError('Invalid target:', target)
......@@ -147,22 +143,17 @@ class LBMPeriodicityHandling:
def __init__(self, stencil, data_handling, pdf_field_name,
streaming_pattern='pull', ghost_layers=1,
opencl_queue=None, opencl_ctx=None,
pycuda_direct_copy=True):
"""
Periodicity Handling for Lattice Boltzmann Streaming.
**On the usage with cuda/opencl:**
**On the usage with cuda:**
- pycuda allows the copying of sliced arrays within device memory using the numpy syntax,
e.g. `dst[:,0] = src[:,-1]`. In this implementation, this is the default for periodicity
handling. Alternatively, if you set `pycuda_direct_copy=False`, GPU kernels are generated and
compiled. The compiled kernels are almost twice as fast in execution as pycuda array copying,
but especially for large stencils like D3Q27, their compilation can take up to 20 seconds.
Choose your weapon depending on your use case.
- pyopencl does not support copying of non-contiguous sliced arrays, so the usage of compiled
copy kernels is forced on us. On the positive side, compilation of the OpenCL kernels appears
to be about four times faster.
"""
if not isinstance(data_handling, SerialDataHandling):
raise ValueError('Only serial data handling is supported!')
......@@ -172,7 +163,7 @@ class LBMPeriodicityHandling:
self.dh = data_handling
target = data_handling.default_target
assert target in [Target.CPU, Target.GPU, Target.OPENCL]
assert target in [Target.CPU, Target.GPU]
self.pdf_field_name = pdf_field_name
self.ghost_layers = ghost_layers
......@@ -180,8 +171,6 @@ class LBMPeriodicityHandling:
self.inplace_pattern = is_inplace(streaming_pattern)
self.target = target
self.cpu = target == Target.CPU
self.opencl_queue = opencl_queue
self.opencl_ctx = opencl_ctx
self.pycuda_direct_copy = target == Target.GPU and pycuda_direct_copy
def is_copy_direction(direction):
......@@ -205,7 +194,7 @@ class LBMPeriodicityHandling:
ghost_layers=ghost_layers)
self.comm_slices.append(list(chain.from_iterable(v for k, v in slices_per_comm_dir.items())))
if target == Target.OPENCL or (target == Target.GPU and not pycuda_direct_copy):
if target == Target.GPU and not pycuda_direct_copy:
self.device_copy_kernels = []
for timestep in timesteps:
self.device_copy_kernels.append(self._compile_copy_kernels(timestep))
......@@ -227,9 +216,7 @@ class LBMPeriodicityHandling:
kernels = []
for src, dst in self.comm_slices[timestep.idx]:
kernels.append(
periodic_pdf_copy_kernel(
pdf_field, src, dst, target=self.target,
opencl_queue=self.opencl_queue, opencl_ctx=self.opencl_ctx))
periodic_pdf_copy_kernel(pdf_field, src, dst, target=self.target))
return kernels
def _periodicity_handling_gpu(self, prev_timestep):
......
......@@ -74,7 +74,7 @@ class LatticeBoltzmannStep:
self.density_data_name = name + "_density" if density_data_name is None else density_data_name
self.density_data_index = density_data_index
self._gpu = target == Target.GPU or target == Target.OPENCL
self._gpu = target == Target.GPU
layout = lbm_optimisation.field_layout
alignment = False
......
......@@ -25,27 +25,15 @@ try:
except Exception:
pass
try:
import pystencils.opencl.autoinit
from pystencils.opencl.opencljit import get_global_cl_queue
if get_global_cl_queue() is not None:
targets += [Target.OPENCL]
except Exception:
pass
@pytest.mark.parametrize('target', targets)
@pytest.mark.parametrize('stencil', [Stencil.D2Q9, Stencil.D3Q19, Stencil.D3Q27])
@pytest.mark.parametrize('streaming_pattern', streaming_patterns)
@pytest.mark.longrun
def test_fully_periodic_flow(target, stencil, streaming_pattern):
if target == Target.OPENCL:
opencl_queue = get_global_cl_queue()
else:
opencl_queue = None
gpu = target in [Target.GPU, Target.OPENCL]
gpu = False
if target == Target.GPU:
gpu = True
# Stencil
stencil = LBStencil(stencil)
......@@ -59,8 +47,7 @@ def test_fully_periodic_flow(target, stencil, streaming_pattern):
domain_size = (30,) * stencil.D
periodicity = (True,) * stencil.D
dh = create_data_handling(domain_size=domain_size, periodicity=periodicity,
default_target=target, opencl_queue=opencl_queue)
dh = create_data_handling(domain_size=domain_size, periodicity=periodicity, default_target=target)
pdfs = dh.add_array('pdfs', stencil.Q)
if not inplace:
......
......@@ -26,14 +26,6 @@ try:
except Exception:
pass
try:
import pystencils.opencl.autoinit
from pystencils.opencl.opencljit import get_global_cl_queue
if get_global_cl_queue() is not None:
targets += [Target.OPENCL]
except Exception:
pass
class PeriodicPipeFlow:
def __init__(self, stencil, streaming_pattern, wall_boundary=None, target=Target.CPU):
......@@ -42,7 +34,7 @@ class PeriodicPipeFlow:
wall_boundary = NoSlip()
self.target = target
self.gpu = target in [Target.GPU, Target.OPENCL]
self.gpu = target in [Target.GPU]
# Stencil
self.stencil = stencil
......
......@@ -4,7 +4,7 @@ import pytest
from lbmpy.boundaries import NoSlip, UBB, SimpleExtrapolationOutflow, ExtrapolationOutflow, \
FixedDensity, DiffusionDirichlet, NeumannByCopy, StreamInConstant, FreeSlip
from lbmpy.boundaries.boundaryhandling import LatticeBoltzmannBoundaryHandling
from lbmpy.creationfunctions import create_lb_function, create_lb_method, LBMConfig, LBMOptimisation
from lbmpy.creationfunctions import create_lb_function, create_lb_method, LBMConfig
from lbmpy.enums import Stencil, Method
from lbmpy.geometry import add_box_boundary
from lbmpy.lbstep import LatticeBoltzmannStep
......@@ -22,22 +22,18 @@ def mirror_stencil(direction, mirror_axis):
return tuple(direction)
@pytest.mark.parametrize("target", [Target.GPU, Target.CPU, Target.OPENCL])
@pytest.mark.parametrize("target", [Target.GPU, Target.CPU])
def test_simple(target):
if target == Target.GPU:
import pytest
pytest.importorskip('pycuda')
elif target == Target.OPENCL:
import pytest
pytest.importorskip('pyopencl')
import pystencils.opencl.autoinit
dh = create_data_handling((4, 4), parallel=False, default_target=target)
dh.add_array('pdfs', values_per_cell=9, cpu=True, gpu=target != Target.CPU)
for i in range(9):
dh.fill("pdfs", i, value_idx=i, ghost_layers=True)
if target == Target.GPU or target == Target.OPENCL:
if target == Target.GPU:
dh.all_to_gpu()
lbm_config = LBMConfig(stencil=LBStencil(Stencil.D2Q9), compressible=False, relaxation_rate=1.8)
......@@ -57,7 +53,7 @@ def test_simple(target):
bh.prepare()
bh()
if target == Target.GPU or target == Target.OPENCL:
if target == Target.GPU:
dh.all_to_cpu()
# left lower corner
assert (dh.cpu_arrays['pdfs'][0, 0, 6] == 7)
......
from hashlib import sha256
from pystencils import Backend, CreateKernelConfig, Target
from lbmpy.creationfunctions import create_lb_ast
from lbmpy.enums import Stencil, Method
from lbmpy.creationfunctions import LBMConfig
from lbmpy.stencils import LBStencil
def test_hash_equivalence_llvm():
import pytest
pytest.importorskip("llvmlite")
from pystencils.llvm.llvmjit import generate_llvm
ref_value = "f1b1879e304fe8533977c885f2744516dd4964064a7e4ae64fd94b8426d995bb"
lbm_config = LBMConfig(stencil=LBStencil(Stencil.D2Q9), method=Method.SRT)
config = CreateKernelConfig(target=Target.CPU, backend=Backend.LLVM)
ast = create_lb_ast(lbm_config=lbm_config, config=config)
code = generate_llvm(ast)
hash_value = sha256(str(code).encode()).hexdigest()
assert hash_value == ref_value
......@@ -50,40 +50,6 @@ def test_data_handling_3d():
np.testing.assert_almost_equal(results[0], arr)
def test_data_handling_2d_opencl():
pytest.importorskip('pyopencl')
import pystencils.opencl.opencljit
pystencils.opencl.opencljit.init_globally()
print("--- LDC 2D test ---")
results = []
# Since waLBerla has no OpenCL Backend yet, it is not possible to use the
# parallel Datahandling with OpenCL at the moment
# TODO: Activate parallel Datahandling if Backend is available
parallel = False
for gpu in [True, False] if gpu_available else [False]:
if parallel and gpu and not hasattr(wLB, 'cuda'):
continue
print(f"Testing parallel: {parallel}\tgpu: {gpu}")
config = CreateKernelConfig(target=Target.GPU if gpu else Target.CPU,
gpu_indexing_params=MappingProxyType({'block_size': (8, 4, 2)}))
if parallel:
from pystencils.datahandling import ParallelDataHandling
blocks = wLB.createUniformBlockGrid(blocks=(2, 3, 1), cellsPerBlock=(5, 5, 1),
oneBlockPerProcess=False)
dh = ParallelDataHandling(blocks, dim=2)
rho = ldc_setup(data_handling=dh, config=config)
results.append(rho)
else:
rho = ldc_setup(domain_size=(10, 15), parallel=False, config=config)
results.append(rho)
for i, arr in enumerate(results[1:]):
print(f"Testing equivalence version 0 with version {i + 1}")
np.testing.assert_almost_equal(results[0], arr)
def test_data_handling_2d():
print("--- LDC 2D test ---")
results = []
......
......@@ -10,15 +10,11 @@ import pystencils as ps
from poiseuille import poiseuille_channel
@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU, ps.Target.OPENCL))
@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU))
@pytest.mark.parametrize('stencil_name', (Stencil.D2Q9, Stencil.D3Q19))
def test_poiseuille_channel(target, stencil_name):
# OpenCL and Cuda
if target == ps.Target.OPENCL:
import pytest
pytest.importorskip("pyopencl")
import pystencils.opencl.autoinit
elif target == ps.Target.GPU:
# Cuda
if target == ps.Target.GPU:
import pytest
pytest.importorskip("pycuda")
......
......@@ -60,14 +60,11 @@ shear_velocity = 0.2 # scale by width to keep stable
t_max = 2000
@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU, ps.Target.OPENCL))
@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU))
@pytest.mark.parametrize('stencil_name', (Stencil.D2Q9, Stencil.D3Q19))
def test_shear_flow(target, stencil_name):
# OpenCL and Cuda
if target == ps.Target.OPENCL:
pytest.importorskip("pyopencl")
import pystencils.opencl.autoinit
elif target == ps.Target.GPU:
# Cuda
if target == ps.Target.GPU:
pytest.importorskip("pycuda")
# LB parameters
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment