Skip to content
Snippets Groups Projects
Commit ff03108b authored by Stephan Seitz's avatar Stephan Seitz
Browse files

Extend SerialDataHandling to handle alternative GPU libraries

parent 65c8db0a
Branches
No related merge requests found
......@@ -20,7 +20,8 @@ def create_data_handling(domain_size: Tuple[int, ...],
default_layout: str = 'SoA',
default_target: str = 'cpu',
parallel: bool = False,
default_ghost_layers: int = 1) -> DataHandling:
default_ghost_layers: int = 1,
opencl_queue=None) -> DataHandling:
"""Creates a data handling instance.
Args:
......@@ -33,6 +34,7 @@ def create_data_handling(domain_size: Tuple[int, ...],
default_ghost_layers: default number of ghost layers if not overwritten in 'add_array'
"""
if parallel:
assert not opencl_queue, "OpenCL is only supported for SerialDataHandling"
if wlb is None:
raise ValueError("Cannot create parallel data handling because walberla module is not available")
......@@ -56,8 +58,12 @@ def create_data_handling(domain_size: Tuple[int, ...],
return ParallelDataHandling(blocks=block_storage, dim=dim, default_target=default_target,
default_layout=default_layout, default_ghost_layers=default_ghost_layers)
else:
return SerialDataHandling(domain_size, periodicity=periodicity, default_target=default_target,
default_layout=default_layout, default_ghost_layers=default_ghost_layers)
return SerialDataHandling(domain_size,
periodicity=periodicity,
default_target=default_target,
default_layout=default_layout,
default_ghost_layers=default_ghost_layers,
opencl_queue=opencl_queue)
__all__ = ['create_data_handling']
try:
import pycuda.gpuarray as gpuarray
except ImportError:
gpuarray = None
import numpy as np
import pystencils
class PyCudaArrayHandler:
def __init__(self):
import pycuda.autoinit # NOQA
def zeros(self, shape, dtype=np.float64, order='C'):
return gpuarray.zeros(shape, dtype, order)
def ones(self, shape, dtype, order='C'):
return gpuarray.ones(shape, dtype, order)
def empty(self, shape, dtype=np.float64, layout=None):
if layout:
cpu_array = pystencils.field.create_numpy_array_with_layout(shape, dtype, layout)
return self.from_numpy(cpu_array)
else:
return gpuarray.empty(shape, dtype)
def to_gpu(self, array):
return gpuarray.to_gpu(array)
def upload(self, gpuarray, numpy_array):
gpuarray.set(numpy_array)
def download(self, gpuarray, numpy_array):
gpuarray.get(numpy_array)
def randn(self, shape, dtype=np.float64):
cpu_array = np.random.randn(*shape).astype(dtype)
return self.from_numpy(cpu_array)
try:
import pyopencl.array as gpuarray
except ImportError:
gpuarray = None
import numpy as np
import pystencils
class PyOpenClArrayHandler:
def __init__(self, queue):
if not queue:
from pystencils.opencl.opencljit import get_global_cl_queue
queue = get_global_cl_queue()
assert queue, "OpenCL queue missing"
self.queue = queue
def zeros(self, shape, dtype=np.float64, order='C'):
return gpuarray.zeros(shape, dtype, order)
def ones(self, shape, dtype, order='C'):
return gpuarray.ones(self.queue, shape, dtype, order)
def empty(self, shape, dtype=np.float64, layout=None):
if layout:
cpu_array = pystencils.field.create_numpy_array_with_layout(shape, dtype, layout)
return self.from_numpy(cpu_array)
else:
return gpuarray.empty(self.queue, shape, dtype)
def to_gpu(self, array):
return gpuarray.to_device(self.queue, array)
def upload(self, gpuarray, numpy_array):
gpuarray.set(numpy_array, self.queue)
def download(self, gpuarray, numpy_array):
gpuarray.get(self.queue, numpy_array)
def randn(self, shape, dtype=np.float64):
cpu_array = np.random.randn(*shape).astype(dtype)
return self.from_numpy(cpu_array)
......@@ -6,22 +6,24 @@ import numpy as np
from pystencils.datahandling.blockiteration import SerialBlock
from pystencils.datahandling.datahandling_interface import DataHandling
from pystencils.datahandling.pycuda import PyCudaArrayHandler
from pystencils.datahandling.pyopencl import PyOpenClArrayHandler
from pystencils.field import (
Field, FieldType, create_numpy_array_with_layout, layout_string_to_tuple, spatial_layout_string_to_tuple)
from pystencils.slicing import normalize_slice, remove_ghost_layers
from pystencils.utils import DotDict
try:
import pycuda.gpuarray as gpuarray
import pycuda.autoinit # NOQA
except ImportError:
gpuarray = None
class SerialDataHandling(DataHandling):
def __init__(self, domain_size: Sequence[int], default_ghost_layers: int = 1, default_layout: str = 'SoA',
periodicity: Union[bool, Sequence[bool]] = False, default_target: str = 'cpu') -> None:
def __init__(self,
domain_size: Sequence[int],
default_ghost_layers: int = 1,
default_layout: str = 'SoA',
periodicity: Union[bool, Sequence[bool]] = False,
default_target: str = 'cpu',
opencl_queue=None,
array_handler=None) -> None:
"""
Creates a data handling for single node simulations.
......@@ -43,6 +45,18 @@ class SerialDataHandling(DataHandling):
self.custom_data_gpu = DotDict()
self._custom_data_transfer_functions = {}
if array_handler:
self.array_handler = array_handler
else:
try:
self.array_handler = PyCudaArrayHandler()
except Exception:
self.array_handler = None
if default_target == 'opencl' or opencl_queue:
default_target = 'gpu'
self.array_handler = PyOpenClArrayHandler(opencl_queue)
if periodicity is None or periodicity is False:
periodicity = [False] * self.dim
if periodicity is True:
......@@ -125,7 +139,7 @@ class SerialDataHandling(DataHandling):
if gpu:
if name in self.gpu_arrays:
raise ValueError("GPU Field with this name already exists")
self.gpu_arrays[name] = gpuarray.to_gpu(cpu_arr)
self.gpu_arrays[name] = self.array_handler.to_gpu(cpu_arr)
assert all(f.name != name for f in self.fields.values()), "Symbolic field with this name already exists"
self.fields[name] = Field.create_from_numpy_array(name, cpu_arr, index_dimensions=index_dimensions,
......@@ -221,7 +235,8 @@ class SerialDataHandling(DataHandling):
self.to_gpu(name)
def run_kernel(self, kernel_function, **kwargs):
arrays = self.gpu_arrays if kernel_function.ast.backend == 'gpucuda' else self.cpu_arrays
arrays = self.gpu_arrays if kernel_function.ast.backend == 'gpucuda' \
or kernel_function.ast.backend == 'opencl' else self.cpu_arrays
kernel_function(**arrays, **kwargs)
def get_kernel_kwargs(self, kernel_function, **kwargs):
......@@ -235,14 +250,14 @@ class SerialDataHandling(DataHandling):
transfer_func = self._custom_data_transfer_functions[name][1]
transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name])
else:
self.gpu_arrays[name].get(self.cpu_arrays[name])
self.array_handler.download(self.gpu_arrays[name], self.cpu_arrays[name])
def to_gpu(self, name):
if name in self._custom_data_transfer_functions:
transfer_func = self._custom_data_transfer_functions[name][0]
transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name])
else:
self.gpu_arrays[name].set(self.cpu_arrays[name])
self.array_handler.upload(self.gpu_arrays[name], self.cpu_arrays[name])
def is_on_gpu(self, name):
return name in self.gpu_arrays
......
......@@ -6,6 +6,13 @@ import numpy as np
import pystencils as ps
from pystencils import create_data_handling, create_kernel
try:
import pytest
except ImportError:
import unittest.mock
pytest = unittest.mock.MagicMock()
def basic_iteration(dh):
dh.add_array('basic_iter_test_gl_default')
......@@ -100,15 +107,9 @@ def synchronization(dh, test_gpu=False):
np.testing.assert_equal(42, b[field_name])
def kernel_execution_jacobi(dh, test_gpu=False):
if test_gpu:
try:
from pycuda import driver
import pycuda.autoinit
except ImportError:
print("Skipping kernel_execution_jacobi GPU version, because pycuda not available")
return
def kernel_execution_jacobi(dh, target):
test_gpu = target == 'gpu' or target == 'opencl'
dh.add_array('f', gpu=test_gpu)
dh.add_array('tmp', gpu=test_gpu)
stencil_2d = [(1, 0), (-1, 0), (0, 1), (0, -1)]
......@@ -119,7 +120,7 @@ def kernel_execution_jacobi(dh, test_gpu=False):
def jacobi():
dh.fields.tmp.center @= sum(dh.fields.f.neighbors(stencil)) / len(stencil)
kernel = create_kernel(jacobi, target='gpu' if test_gpu else 'cpu').compile()
kernel = create_kernel(jacobi, target).compile()
for b in dh.iterate(ghost_layers=1):
b['f'].fill(42)
dh.run_kernel(kernel)
......@@ -196,17 +197,32 @@ def test_access_and_gather():
def test_kernel():
for domain_shape in [(4, 5), (3, 4, 5)]:
dh = create_data_handling(domain_size=domain_shape, periodicity=True)
kernel_execution_jacobi(dh, test_gpu=True)
kernel_execution_jacobi(dh, 'cpu')
reduction(dh)
try:
import pycuda
dh = create_data_handling(domain_size=domain_shape, periodicity=True)
kernel_execution_jacobi(dh, test_gpu=False)
kernel_execution_jacobi(dh, 'gpu')
except ImportError:
pass
@pytest.mark.parametrize('target', ('cpu', 'gpu', 'opencl'))
def test_kernel_param(target):
for domain_shape in [(4, 5), (3, 4, 5)]:
if target == 'gpu':
pytest.importorskip('pycuda')
if target == 'opencl':
pytest.importorskip('pyopencl')
from pystencils.opencl.opencljit import init_globally
init_globally()
dh = create_data_handling(domain_size=domain_shape, periodicity=True, default_target=target)
kernel_execution_jacobi(dh, target)
reduction(dh)
def test_vtk_output():
for domain_shape in [(4, 5), (3, 4, 5)]:
dh = create_data_handling(domain_size=domain_shape, periodicity=True)
......
......@@ -50,13 +50,13 @@ def test_kernel():
# 3D
blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 4), cellsPerBlock=(3, 2, 5), oneBlockPerProcess=False)
dh = ParallelDataHandling(blocks)
kernel_execution_jacobi(dh, test_gpu=gpu)
kernel_execution_jacobi(dh, 'gpu')
reduction(dh)
# 2D
blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 1), cellsPerBlock=(3, 2, 1), oneBlockPerProcess=False)
dh = ParallelDataHandling(blocks, dim=2)
kernel_execution_jacobi(dh, test_gpu=gpu)
kernel_execution_jacobi(dh, 'gpu')
reduction(dh)
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment