diff --git a/pystencils/datahandling/__init__.py b/pystencils/datahandling/__init__.py index 583f121b09ffbfdc6dc2103ec6e300e639791891..a4fa55bdc7a52e1b9c2015e2210fcbb48aaeb2e1 100644 --- a/pystencils/datahandling/__init__.py +++ b/pystencils/datahandling/__init__.py @@ -20,7 +20,8 @@ def create_data_handling(domain_size: Tuple[int, ...], default_layout: str = 'SoA', default_target: str = 'cpu', parallel: bool = False, - default_ghost_layers: int = 1) -> DataHandling: + default_ghost_layers: int = 1, + opencl_queue=None) -> DataHandling: """Creates a data handling instance. Args: @@ -33,6 +34,7 @@ def create_data_handling(domain_size: Tuple[int, ...], default_ghost_layers: default number of ghost layers if not overwritten in 'add_array' """ if parallel: + assert not opencl_queue, "OpenCL is only supported for SerialDataHandling" if wlb is None: raise ValueError("Cannot create parallel data handling because walberla module is not available") @@ -56,8 +58,12 @@ def create_data_handling(domain_size: Tuple[int, ...], return ParallelDataHandling(blocks=block_storage, dim=dim, default_target=default_target, default_layout=default_layout, default_ghost_layers=default_ghost_layers) else: - return SerialDataHandling(domain_size, periodicity=periodicity, default_target=default_target, - default_layout=default_layout, default_ghost_layers=default_ghost_layers) + return SerialDataHandling(domain_size, + periodicity=periodicity, + default_target=default_target, + default_layout=default_layout, + default_ghost_layers=default_ghost_layers, + opencl_queue=opencl_queue) __all__ = ['create_data_handling'] diff --git a/pystencils/datahandling/pycuda.py b/pystencils/datahandling/pycuda.py new file mode 100644 index 0000000000000000000000000000000000000000..30602a30ce6b87d0e25861b43c5291cda77ed570 --- /dev/null +++ b/pystencils/datahandling/pycuda.py @@ -0,0 +1,39 @@ +try: + import pycuda.gpuarray as gpuarray +except ImportError: + gpuarray = None +import numpy as np + +import pystencils + + +class PyCudaArrayHandler: + + def __init__(self): + import pycuda.autoinit # NOQA + + def zeros(self, shape, dtype=np.float64, order='C'): + return gpuarray.zeros(shape, dtype, order) + + def ones(self, shape, dtype, order='C'): + return gpuarray.ones(shape, dtype, order) + + def empty(self, shape, dtype=np.float64, layout=None): + if layout: + cpu_array = pystencils.field.create_numpy_array_with_layout(shape, dtype, layout) + return self.from_numpy(cpu_array) + else: + return gpuarray.empty(shape, dtype) + + def to_gpu(self, array): + return gpuarray.to_gpu(array) + + def upload(self, gpuarray, numpy_array): + gpuarray.set(numpy_array) + + def download(self, gpuarray, numpy_array): + gpuarray.get(numpy_array) + + def randn(self, shape, dtype=np.float64): + cpu_array = np.random.randn(*shape).astype(dtype) + return self.from_numpy(cpu_array) diff --git a/pystencils/datahandling/pyopencl.py b/pystencils/datahandling/pyopencl.py new file mode 100644 index 0000000000000000000000000000000000000000..05470ea9fec1b1128e88c9226d9a53a9ac1556d4 --- /dev/null +++ b/pystencils/datahandling/pyopencl.py @@ -0,0 +1,44 @@ +try: + import pyopencl.array as gpuarray +except ImportError: + gpuarray = None + +import numpy as np + +import pystencils + + +class PyOpenClArrayHandler: + + def __init__(self, queue): + if not queue: + from pystencils.opencl.opencljit import get_global_cl_queue + queue = get_global_cl_queue() + assert queue, "OpenCL queue missing" + self.queue = queue + + def zeros(self, shape, dtype=np.float64, order='C'): + return gpuarray.zeros(shape, dtype, order) + + def ones(self, shape, dtype, order='C'): + return gpuarray.ones(self.queue, shape, dtype, order) + + def empty(self, shape, dtype=np.float64, layout=None): + if layout: + cpu_array = pystencils.field.create_numpy_array_with_layout(shape, dtype, layout) + return self.from_numpy(cpu_array) + else: + return gpuarray.empty(self.queue, shape, dtype) + + def to_gpu(self, array): + return gpuarray.to_device(self.queue, array) + + def upload(self, gpuarray, numpy_array): + gpuarray.set(numpy_array, self.queue) + + def download(self, gpuarray, numpy_array): + gpuarray.get(self.queue, numpy_array) + + def randn(self, shape, dtype=np.float64): + cpu_array = np.random.randn(*shape).astype(dtype) + return self.from_numpy(cpu_array) diff --git a/pystencils/datahandling/serial_datahandling.py b/pystencils/datahandling/serial_datahandling.py index 697b6b6674a83b6786c8a6a3f687df3e49cd8a01..248b1e611b372e49094f925f510658c46c36bf15 100644 --- a/pystencils/datahandling/serial_datahandling.py +++ b/pystencils/datahandling/serial_datahandling.py @@ -6,22 +6,24 @@ import numpy as np from pystencils.datahandling.blockiteration import SerialBlock from pystencils.datahandling.datahandling_interface import DataHandling +from pystencils.datahandling.pycuda import PyCudaArrayHandler +from pystencils.datahandling.pyopencl import PyOpenClArrayHandler from pystencils.field import ( Field, FieldType, create_numpy_array_with_layout, layout_string_to_tuple, spatial_layout_string_to_tuple) from pystencils.slicing import normalize_slice, remove_ghost_layers from pystencils.utils import DotDict -try: - import pycuda.gpuarray as gpuarray - import pycuda.autoinit # NOQA -except ImportError: - gpuarray = None - class SerialDataHandling(DataHandling): - def __init__(self, domain_size: Sequence[int], default_ghost_layers: int = 1, default_layout: str = 'SoA', - periodicity: Union[bool, Sequence[bool]] = False, default_target: str = 'cpu') -> None: + def __init__(self, + domain_size: Sequence[int], + default_ghost_layers: int = 1, + default_layout: str = 'SoA', + periodicity: Union[bool, Sequence[bool]] = False, + default_target: str = 'cpu', + opencl_queue=None, + array_handler=None) -> None: """ Creates a data handling for single node simulations. @@ -43,6 +45,18 @@ class SerialDataHandling(DataHandling): self.custom_data_gpu = DotDict() self._custom_data_transfer_functions = {} + if array_handler: + self.array_handler = array_handler + else: + try: + self.array_handler = PyCudaArrayHandler() + except Exception: + self.array_handler = None + + if default_target == 'opencl' or opencl_queue: + default_target = 'gpu' + self.array_handler = PyOpenClArrayHandler(opencl_queue) + if periodicity is None or periodicity is False: periodicity = [False] * self.dim if periodicity is True: @@ -126,7 +140,7 @@ class SerialDataHandling(DataHandling): if gpu: if name in self.gpu_arrays: raise ValueError("GPU Field with this name already exists") - self.gpu_arrays[name] = gpuarray.to_gpu(cpu_arr) + self.gpu_arrays[name] = self.array_handler.to_gpu(cpu_arr) assert all(f.name != name for f in self.fields.values()), "Symbolic field with this name already exists" self.fields[name] = Field.create_from_numpy_array(name, cpu_arr, index_dimensions=index_dimensions, @@ -222,7 +236,8 @@ class SerialDataHandling(DataHandling): self.to_gpu(name) def run_kernel(self, kernel_function, **kwargs): - arrays = self.gpu_arrays if kernel_function.ast.backend == 'gpucuda' else self.cpu_arrays + arrays = self.gpu_arrays if kernel_function.ast.backend == 'gpucuda' \ + or kernel_function.ast.backend == 'opencl' else self.cpu_arrays kernel_function(**arrays, **kwargs) def get_kernel_kwargs(self, kernel_function, **kwargs): @@ -236,14 +251,14 @@ class SerialDataHandling(DataHandling): transfer_func = self._custom_data_transfer_functions[name][1] transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name]) else: - self.gpu_arrays[name].get(self.cpu_arrays[name]) + self.array_handler.download(self.gpu_arrays[name], self.cpu_arrays[name]) def to_gpu(self, name): if name in self._custom_data_transfer_functions: transfer_func = self._custom_data_transfer_functions[name][0] transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name]) else: - self.gpu_arrays[name].set(self.cpu_arrays[name]) + self.array_handler.upload(self.gpu_arrays[name], self.cpu_arrays[name]) def is_on_gpu(self, name): return name in self.gpu_arrays diff --git a/pystencils_tests/test_datahandling.py b/pystencils_tests/test_datahandling.py index cdee059c218a67ad9adc824f41fa09ae3df2f19b..1af1a68e3bfdb3681e05443c2412d8bd24903b0d 100644 --- a/pystencils_tests/test_datahandling.py +++ b/pystencils_tests/test_datahandling.py @@ -6,6 +6,13 @@ import numpy as np import pystencils as ps from pystencils import create_data_handling, create_kernel +try: + import pytest +except ImportError: + import unittest.mock + pytest = unittest.mock.MagicMock() + + def basic_iteration(dh): dh.add_array('basic_iter_test_gl_default') @@ -100,15 +107,9 @@ def synchronization(dh, test_gpu=False): np.testing.assert_equal(42, b[field_name]) -def kernel_execution_jacobi(dh, test_gpu=False): - if test_gpu: - try: - from pycuda import driver - import pycuda.autoinit - except ImportError: - print("Skipping kernel_execution_jacobi GPU version, because pycuda not available") - return +def kernel_execution_jacobi(dh, target): + test_gpu = target == 'gpu' or target == 'opencl' dh.add_array('f', gpu=test_gpu) dh.add_array('tmp', gpu=test_gpu) stencil_2d = [(1, 0), (-1, 0), (0, 1), (0, -1)] @@ -119,7 +120,7 @@ def kernel_execution_jacobi(dh, test_gpu=False): def jacobi(): dh.fields.tmp.center @= sum(dh.fields.f.neighbors(stencil)) / len(stencil) - kernel = create_kernel(jacobi, target='gpu' if test_gpu else 'cpu').compile() + kernel = create_kernel(jacobi, target).compile() for b in dh.iterate(ghost_layers=1): b['f'].fill(42) dh.run_kernel(kernel) @@ -196,17 +197,32 @@ def test_access_and_gather(): def test_kernel(): for domain_shape in [(4, 5), (3, 4, 5)]: dh = create_data_handling(domain_size=domain_shape, periodicity=True) - kernel_execution_jacobi(dh, test_gpu=True) + kernel_execution_jacobi(dh, 'cpu') reduction(dh) try: import pycuda dh = create_data_handling(domain_size=domain_shape, periodicity=True) - kernel_execution_jacobi(dh, test_gpu=False) + kernel_execution_jacobi(dh, 'gpu') except ImportError: pass +@pytest.mark.parametrize('target', ('cpu', 'gpu', 'opencl')) +def test_kernel_param(target): + for domain_shape in [(4, 5), (3, 4, 5)]: + if target == 'gpu': + pytest.importorskip('pycuda') + if target == 'opencl': + pytest.importorskip('pyopencl') + from pystencils.opencl.opencljit import init_globally + init_globally() + + dh = create_data_handling(domain_size=domain_shape, periodicity=True, default_target=target) + kernel_execution_jacobi(dh, target) + reduction(dh) + + def test_vtk_output(): for domain_shape in [(4, 5), (3, 4, 5)]: dh = create_data_handling(domain_size=domain_shape, periodicity=True) diff --git a/pystencils_tests/test_datahandling_parallel.py b/pystencils_tests/test_datahandling_parallel.py index 52ccb6bce34b2ca115b19a8d00fa31da622b8ff4..0bfbfbd02fc86952e7fba84ce6aa080d2080ec0a 100644 --- a/pystencils_tests/test_datahandling_parallel.py +++ b/pystencils_tests/test_datahandling_parallel.py @@ -50,13 +50,13 @@ def test_kernel(): # 3D blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 4), cellsPerBlock=(3, 2, 5), oneBlockPerProcess=False) dh = ParallelDataHandling(blocks) - kernel_execution_jacobi(dh, test_gpu=gpu) + kernel_execution_jacobi(dh, 'gpu') reduction(dh) # 2D blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 1), cellsPerBlock=(3, 2, 1), oneBlockPerProcess=False) dh = ParallelDataHandling(blocks, dim=2) - kernel_execution_jacobi(dh, test_gpu=gpu) + kernel_execution_jacobi(dh, 'gpu') reduction(dh)