Commit 33aa3bdd authored by Martin Bauer's avatar Martin Bauer
Browse files

Major data handling and kernel target refactoring

- gpu -> accelerator
- no distinction between and target and backend any more
parent 85484500
Pipeline #20034 failed with stage
in 1 minute and 53 seconds
......@@ -164,31 +164,20 @@ class KernelFunction(Node):
def field_name(self):
return self.fields[0].name
def __init__(self, body, target, backend, compile_function, ghost_layers, function_name="kernel"):
def __init__(self, body, target, compile_function, ghost_layers, function_name="kernel"):
super(KernelFunction, self).__init__()
self._body = body
body.parent = self
self.function_name = function_name
self._body.parent = self
self.ghost_layers = ghost_layers
self._target = target
self._backend = backend
self.target = target
# these variables are assumed to be global, so no automatic parameter is generated for them
self.global_variables = set()
self.instruction_set = None # used in `vectorize` function to tell the backend which i.s. (SSE,AVX) to use
# function that compiles the node to a Python callable, is set by the backends
self._compile_function = compile_function
@property
def target(self):
"""Currently either 'cpu' or 'gpu' """
return self._target
@property
def backend(self):
"""Backend for generating the code e.g. 'llvm', 'c', 'cuda' """
return self._backend
@property
def symbols_defined(self):
return set()
......
......@@ -43,7 +43,7 @@ class FlagInterface:
raise ValueError("There is already a boundary handling registered at the data handling."
"If you want to add multiple handling objects, choose a different name.")
self.flag_field = data_handling.add_array(self.flag_field_name, dtype=self.dtype, cpu=True, gpu=False)
self.flag_field = data_handling.add_array(self.flag_field_name, dtype=self.dtype, cpu=True, acc=False)
ff_ghost_layers = data_handling.ghost_layers_of_field(self.flag_field_name)
for b in data_handling.iterate(ghost_layers=ff_ghost_layers):
b[self.flag_field_name].fill(self.domain_flag)
......@@ -87,26 +87,23 @@ class BoundaryHandling:
fi = flag_interface
self.flag_interface = fi if fi is not None else FlagInterface(data_handling, name + "Flags")
gpu = self._target in self._data_handling._GPU_LIKE_TARGETS
class_ = self.IndexFieldBlockData
if self._target == 'opencl':
def opencl_to_device(gpu_version, cpu_version):
from pyopencl import array
gpu_version = gpu_version.boundary_object_to_index_list
cpu_version = cpu_version.boundary_object_to_index_list
for obj, cpu_arr in cpu_version.items():
if obj not in gpu_version or gpu_version[obj].shape != cpu_arr.shape:
from pystencils.opencl.opencljit import get_global_cl_queue
queue = self._data_handling._opencl_queue or get_global_cl_queue()
gpu_version[obj] = array.to_device(queue, cpu_arr)
else:
gpu_version[obj].set(cpu_arr)
class_ = type('opencl_class', (self.IndexFieldBlockData,), {
'to_gpu': opencl_to_device
})
data_handling.add_custom_class(self._index_array_name, class_, cpu=True, gpu=gpu)
def to_cpu(gpu_version, cpu_version):
gpu_version = gpu_version.boundary_object_to_index_list
cpu_version = cpu_version.boundary_object_to_index_list
for obj, cpu_arr in cpu_version.items():
gpu_version[obj].get(cpu_arr)
def to_acc(gpu_version, cpu_version):
gpu_version = gpu_version.boundary_object_to_index_list
cpu_version = cpu_version.boundary_object_to_index_list
for obj, cpu_arr in cpu_version.items():
if obj not in gpu_version or gpu_version[obj].shape != cpu_arr.shape:
gpu_version[obj] = self.data_handling.array_handler.to_gpu(cpu_arr)
else:
self.data_handling.array_handler.upload(gpu_version[obj], cpu_arr)
creation_function = lambda: self.IndexFieldBlockData()
data_handling.add_custom_data(self._index_array_name, creation_function, creation_function, to_acc, to_cpu)
@property
def data_handling(self):
......@@ -222,7 +219,7 @@ class BoundaryHandling:
if self._dirty:
self.prepare()
for b in self._data_handling.iterate(gpu=self._target in self._data_handling._GPU_LIKE_TARGETS):
for b in self._data_handling.iterate(acc=self._target in self._data_handling.ACCELERATOR_TARGETS):
for b_obj, idx_arr in b[self._index_array_name].boundary_object_to_index_list.items():
kwargs[self._field_name] = b[self._field_name]
kwargs['indexField'] = idx_arr
......@@ -237,7 +234,7 @@ class BoundaryHandling:
if self._dirty:
self.prepare()
for b in self._data_handling.iterate(gpu=self._target in self._data_handling._GPU_LIKE_TARGETS):
for b in self._data_handling.iterate(acc=self._target in self._data_handling.ACCELERATOR_TARGETS):
for b_obj, idx_arr in b[self._index_array_name].boundary_object_to_index_list.items():
arguments = kwargs.copy()
arguments[self._field_name] = b[self._field_name]
......@@ -320,8 +317,8 @@ class BoundaryHandling:
def _boundary_data_initialization(self, boundary_obj, boundary_data_setter, **kwargs):
if boundary_obj.additional_data_init_callback:
boundary_obj.additional_data_init_callback(boundary_data_setter, **kwargs)
if self._target in self._data_handling._GPU_LIKE_TARGETS:
self._data_handling.to_gpu(self._index_array_name)
if self._target in self._data_handling.ACCELERATOR_TARGETS:
self._data_handling.to_acc(self._index_array_name)
class BoundaryInfo(object):
def __init__(self, boundary_obj, flag, kernel):
......@@ -330,7 +327,7 @@ class BoundaryHandling:
self.kernel = kernel
class IndexFieldBlockData:
def __init__(self, *_1, **_2):
def __init__(self):
self.boundary_object_to_index_list = {}
self.boundary_object_to_data_setter = {}
......@@ -338,25 +335,6 @@ class BoundaryHandling:
self.boundary_object_to_index_list.clear()
self.boundary_object_to_data_setter.clear()
@staticmethod
def to_cpu(gpu_version, cpu_version):
gpu_version = gpu_version.boundary_object_to_index_list
cpu_version = cpu_version.boundary_object_to_index_list
for obj, cpu_arr in cpu_version.items():
gpu_version[obj].get(cpu_arr)
@staticmethod
def to_gpu(gpu_version, cpu_version):
from pycuda import gpuarray
gpu_version = gpu_version.boundary_object_to_index_list
cpu_version = cpu_version.boundary_object_to_index_list
for obj, cpu_arr in cpu_version.items():
if obj not in gpu_version or gpu_version[obj].shape != cpu_arr.shape:
gpu_version[obj] = gpuarray.to_gpu(cpu_arr)
else:
gpu_version[obj].set(cpu_arr)
class BoundaryDataSetter:
def __init__(self, index_array, offset, stencil, ghost_layers, pdf_array):
......
......@@ -64,7 +64,7 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke
loop_order = get_optimal_loop_ordering(fields_without_buffers)
loop_node, ghost_layer_info = make_loop_over_domain(body, iteration_slice=iteration_slice,
ghost_layers=ghost_layers, loop_order=loop_order)
ast_node = KernelFunction(loop_node, 'cpu', 'c', compile_function=make_python_function,
ast_node = KernelFunction(loop_node, 'cpu', compile_function=make_python_function,
ghost_layers=ghost_layer_info, function_name=function_name)
implement_interpolations(body)
......@@ -145,7 +145,7 @@ def create_indexed_kernel(assignments: AssignmentOrAstNodeList, index_fields, fu
loop_body.append(assignment)
function_body = Block([loop_node])
ast_node = KernelFunction(function_body, "cpu", "c", make_python_function,
ast_node = KernelFunction(function_body, "cpu", make_python_function,
ghost_layers=None, function_name=function_name)
fixed_coordinate_mapping = {f.name: coordinate_typed_symbols for f in non_index_fields}
......
......@@ -18,10 +18,9 @@ except ImportError:
def create_data_handling(domain_size: Tuple[int, ...],
periodicity: Union[bool, Tuple[bool, ...]] = False,
default_layout: str = 'SoA',
default_target: str = 'cpu',
target: str = 'cpu',
parallel: bool = False,
default_ghost_layers: int = 1,
opencl_queue=None) -> DataHandling:
default_ghost_layers: int = 1) -> DataHandling:
"""Creates a data handling instance.
Args:
......@@ -29,12 +28,11 @@ def create_data_handling(domain_size: Tuple[int, ...],
periodicity: either True, False for full or no periodicity or a tuple of booleans indicating periodicity
for each coordinate
default_layout: default array layout, that is used if not explicitly specified in 'add_array'
default_target: either 'cpu' or 'gpu'
target: target where code should be run, e.g. 'cpu' or 'cuda' or 'opencl'
parallel: if True a parallel domain is created using walberla - each MPI process gets a part of the domain
default_ghost_layers: default number of ghost layers if not overwritten in 'add_array'
"""
if parallel:
assert not opencl_queue, "OpenCL is only supported for SerialDataHandling"
if wlb is None:
raise ValueError("Cannot create parallel data handling because walberla module is not available")
......@@ -55,15 +53,14 @@ def create_data_handling(domain_size: Tuple[int, ...],
# noinspection PyArgumentList
block_storage = wlb.createUniformBlockGrid(cells=domain_size, periodic=periodicity)
return ParallelDataHandling(blocks=block_storage, dim=dim, default_target=default_target,
return ParallelDataHandling(blocks=block_storage, dim=dim, target=target,
default_layout=default_layout, default_ghost_layers=default_ghost_layers)
else:
return SerialDataHandling(domain_size,
periodicity=periodicity,
default_target=default_target,
target=target,
default_layout=default_layout,
default_ghost_layers=default_ghost_layers,
opencl_queue=opencl_queue)
default_ghost_layers=default_ghost_layers)
__all__ = ['create_data_handling']
......@@ -16,8 +16,7 @@ class DataHandling(ABC):
'gather' function that has collects (parts of the) distributed data on a single process.
"""
_GPU_LIKE_TARGETS = ['gpu', 'opencl']
_GPU_LIKE_BACKENDS = ['gpucuda', 'opencl']
ACCELERATOR_TARGETS = ['cuda', 'opencl', 'llvm_gpu']
# ---------------------------- Adding and accessing data -----------------------------------------------------------
......@@ -39,7 +38,7 @@ class DataHandling(ABC):
@abstractmethod
def add_array(self, name: str, values_per_cell, dtype=np.float64,
latex_name: Optional[str] = None, ghost_layers: Optional[int] = None, layout: Optional[str] = None,
cpu: bool = True, gpu: Optional[bool] = None, alignment=False, field_type=FieldType.GENERIC) -> Field:
cpu: bool = True, acc: Optional[bool] = None, alignment=False, field_type=FieldType.GENERIC) -> Field:
"""Adds a (possibly distributed) array to the handling that can be accessed using the given name.
For each array a symbolic field is available via the 'fields' dictionary
......@@ -56,8 +55,11 @@ class DataHandling(ABC):
layout: memory layout of array, either structure of arrays 'SoA' or array of structures 'AoS'.
this is only important if values_per_cell > 1
cpu: allocate field on the CPU
gpu: allocate field on the GPU, if None, a GPU field is allocated if default_target is 'gpu'
acc: allocate field on an accelerator, if an accelerator target has been selected
if None, an accelerator field is allocated only if the target selected when creating the data handling
is not 'cpu'
alignment: either False for no alignment, or the number of bytes to align to
field_type: change from generic to staggered or absolutely accessed fields, see field documentation
Returns:
pystencils field, that can be used to formulate symbolic kernels
"""
......@@ -67,7 +69,7 @@ class DataHandling(ABC):
"""Returns true if a field or custom data element with this name was added."""
@abstractmethod
def add_array_like(self, name, name_of_template_field, latex_name=None, cpu=True, gpu=None):
def add_array_like(self, name, name_of_template_field, latex_name=None, cpu=True, acc=None):
"""
Adds an array with the same parameters (number of ghost layers, values_per_cell, dtype) as existing array.
......@@ -76,33 +78,33 @@ class DataHandling(ABC):
name_of_template_field: name of array that is used as template
latex_name: see 'add' method
cpu: see 'add' method
gpu: see 'add' method
acc: see 'add' method
"""
@abstractmethod
def add_custom_data(self, name: str, cpu_creation_function,
gpu_creation_function=None, cpu_to_gpu_transfer_func=None, gpu_to_cpu_transfer_func=None):
acc_creation_function=None, cpu_to_acc_transfer_func=None, acc_to_cpu_transfer_func=None):
"""Adds custom (non-array) data to domain.
Args:
name: name to access data
cpu_creation_function: function returning a new instance of the data that should be stored
gpu_creation_function: optional, function returning a new instance, stored on GPU
cpu_to_gpu_transfer_func: function that transfers cpu to gpu version,
getting two parameters (gpu_instance, cpu_instance)
gpu_to_cpu_transfer_func: function that transfers gpu to cpu version, getting two parameters
(gpu_instance, cpu_instance)
acc_creation_function: optional, function returning a new instance, stored on the accelerator
cpu_to_acc_transfer_func: function that transfers cpu to accelerator version,
getting two parameters (acc_instance, cpu_instance)
acc_to_cpu_transfer_func: function that transfers accelerator to cpu version, getting two parameters
(acc_instance, cpu_instance)
"""
def add_custom_class(self, name: str, class_obj, cpu: bool = True, gpu: bool = False):
"""Adds non-array data by passing a class object with optional 'to_gpu' and 'to_cpu' member functions."""
cpu_to_gpu_transfer_func = class_obj.to_gpu if cpu and gpu and hasattr(class_obj, 'to_gpu') else None
gpu_to_cpu_transfer_func = class_obj.to_cpu if cpu and gpu and hasattr(class_obj, 'to_cpu') else None
def add_custom_class(self, name: str, class_obj, cpu: bool = True, acc: bool = False):
"""Adds non-array data by passing a class object with optional 'to_acc' and 'to_cpu' member functions."""
cpu_to_acc_transfer_func = class_obj.to_acc if cpu and acc and hasattr(class_obj, 'to_acc') else None
acc_to_cpu_transfer_func = class_obj.to_cpu if cpu and acc and hasattr(class_obj, 'to_cpu') else None
self.add_custom_data(name,
cpu_creation_function=class_obj if cpu else None,
gpu_creation_function=class_obj if gpu else None,
cpu_to_gpu_transfer_func=cpu_to_gpu_transfer_func,
gpu_to_cpu_transfer_func=gpu_to_cpu_transfer_func)
acc_creation_function=class_obj if acc else None,
cpu_to_acc_transfer_func=cpu_to_acc_transfer_func,
acc_to_cpu_transfer_func=acc_to_cpu_transfer_func)
@property
@abstractmethod
......@@ -128,7 +130,7 @@ class DataHandling(ABC):
"""Returns values_per_cell of array."""
@abstractmethod
def iterate(self, slice_obj=None, gpu=False, ghost_layers=None,
def iterate(self, slice_obj=None, acc=False, ghost_layers=None,
inner_ghost_layers=True) -> Iterable['Block']:
"""Iterate over local part of potentially distributed data structure."""
......@@ -157,32 +159,32 @@ class DataHandling(ABC):
"""
@abstractmethod
def swap(self, name1, name2, gpu=False):
def swap(self, name1, name2, acc=False):
"""Swaps data of two arrays"""
# ------------------------------- CPU/GPU transfer -----------------------------------------------------------------
# ------------------------------- CPU/ACC transfer -----------------------------------------------------------------
@abstractmethod
def to_cpu(self, name):
"""Copies GPU data of array with specified name to CPU.
Works only if 'cpu=True' and 'gpu=True' has been used in 'add' method."""
"""Copies accelerator data of array with specified name to CPU.
Works only if 'cpu=True' and 'acc=True' has been used in 'add' method."""
@abstractmethod
def to_gpu(self, name):
"""Copies GPU data of array with specified name to GPU.
Works only if 'cpu=True' and 'gpu=True' has been used in 'add' method."""
def to_acc(self, name):
"""Copies accelerator data of array with specified name to accelerator.
Works only if 'cpu=True' and 'acc=True' has been used in 'add' method."""
@abstractmethod
def all_to_cpu(self):
"""Copies data from GPU to CPU for all arrays that have a CPU and a GPU representation."""
"""Copies data from accelerator to CPU for all arrays that have a CPU and an accelerator representation."""
@abstractmethod
def all_to_gpu(self):
"""Copies data from CPU to GPU for all arrays that have a CPU and a GPU representation."""
def all_to_acc(self):
"""Copies data from CPU to accelerator for all arrays that have a CPU and a accelerator representation."""
@abstractmethod
def is_on_gpu(self, name):
"""Checks if this data was also allocated on the GPU - does not check if this data item is in synced."""
def is_on_acc(self, name):
"""Checks if this data was also allocated on the accelerator - does not check if this data item is in synced."""
@abstractmethod
def create_vtk_writer(self, file_name, data_names, ghost_layers=False) -> Callable[[int], None]:
......@@ -216,7 +218,7 @@ class DataHandling(ABC):
# ------------------------------- Communication --------------------------------------------------------------------
@abstractmethod
def synchronization_function(self, names, stencil=None, target=None, **kwargs) -> Callable[[], None]:
def synchronization_function(self, names, stencil=None, acc=None, **kwargs) -> Callable[[], None]:
"""Synchronizes ghost layers for distributed arrays.
For serial scenario this has to be called for correct periodicity handling
......@@ -225,8 +227,9 @@ class DataHandling(ABC):
names: what data to synchronize: name of array or sequence of names
stencil: stencil as string defining which neighbors are synchronized e.g. 'D2Q9', 'D3Q19'
if None, a full synchronization (i.e. D2Q9 or D3Q27) is done
target: either 'cpu' or 'gpu
kwargs: implementation specific, optional optimization parameters for communication
acc: synchronize data on accelerator, if None use accelerator when target at construction is an
accelerator target
kwargs: implementation specific
Returns:
function object to run the communication
......
......@@ -16,7 +16,7 @@ class ParallelDataHandling(DataHandling):
GPU_DATA_PREFIX = "gpu_"
VTK_COUNTER = 0
def __init__(self, blocks, default_ghost_layers=1, default_layout='SoA', dim=3, default_target='cpu'):
def __init__(self, blocks, default_ghost_layers=1, default_layout='SoA', dim=3, target='cpu'):
"""
Creates data handling based on walberla block storage
......@@ -27,8 +27,7 @@ class ParallelDataHandling(DataHandling):
dim: dimension of scenario,
walberla always uses three dimensions, so if dim=2 the extend of the
z coordinate of blocks has to be 1
default_target: either 'cpu' or 'gpu' . If set to 'gpu' for each array also a GPU version is allocated
if not overwritten in add_array, and synchronization functions are for the GPU by default
target: either 'cpu' or 'cuda', other targets are not supported in parallel setup
"""
super(ParallelDataHandling, self).__init__()
assert dim in (2, 3)
......@@ -52,7 +51,8 @@ class ParallelDataHandling(DataHandling):
if self._dim == 2:
assert self.blocks.getDomainCellBB().size[2] == 1
self.default_target = default_target
assert target in ('cpu', 'cuda'), "ParallelDataHandling only support 'cpu' and 'cuda' target"
self.target = target
@property
def dim(self):
......@@ -77,24 +77,24 @@ class ParallelDataHandling(DataHandling):
return self._fieldInformation[name]['values_per_cell']
def add_custom_data(self, name, cpu_creation_function,
gpu_creation_function=None, cpu_to_gpu_transfer_func=None, gpu_to_cpu_transfer_func=None):
if cpu_creation_function and gpu_creation_function:
if cpu_to_gpu_transfer_func is None or gpu_to_cpu_transfer_func is None:
acc_creation_function=None, cpu_to_acc_transfer_func=None, acc_to_cpu_transfer_func=None):
if cpu_creation_function and acc_creation_function:
if cpu_to_acc_transfer_func is None or acc_to_cpu_transfer_func is None:
raise ValueError("For GPU data, both transfer functions have to be specified")
self._custom_data_transfer_functions[name] = (cpu_to_gpu_transfer_func, gpu_to_cpu_transfer_func)
self._custom_data_transfer_functions[name] = (cpu_to_acc_transfer_func, acc_to_cpu_transfer_func)
if cpu_creation_function:
self.blocks.addBlockData(name, cpu_creation_function)
if gpu_creation_function:
self.blocks.addBlockData(self.GPU_DATA_PREFIX + name, gpu_creation_function)
if acc_creation_function:
self.blocks.addBlockData(self.GPU_DATA_PREFIX + name, acc_creation_function)
self._custom_data_names.append(name)
def add_array(self, name, values_per_cell=1, dtype=np.float64, latex_name=None, ghost_layers=None,
layout=None, cpu=True, gpu=None, alignment=False, field_type=FieldType.GENERIC):
layout=None, cpu=True, acc=None, alignment=False, field_type=FieldType.GENERIC):
if ghost_layers is None:
ghost_layers = self.default_ghost_layers
if gpu is None:
gpu = self.default_target == 'gpu'
if acc is None:
acc = self.target == 'cuda'
if layout is None:
layout = self.default_layout
if len(self.blocks) == 0:
......@@ -122,13 +122,13 @@ class ParallelDataHandling(DataHandling):
if cpu:
wlb.field.addToStorage(self.blocks, name, dtype, fSize=values_per_cell, layout=layout_map[layout],
ghostLayers=ghost_layers, alignment=alignment)
if gpu:
if acc:
if alignment != 0:
raise ValueError("Alignment for walberla GPU fields not yet supported")
wlb.cuda.addGpuFieldToStorage(self.blocks, self.GPU_DATA_PREFIX + name, dtype, fSize=values_per_cell,
usePitchedMem=False, ghostLayers=ghost_layers, layout=layout_map[layout])
if cpu and gpu:
if cpu and acc:
self._cpu_gpu_pairs.append((name, self.GPU_DATA_PREFIX + name))
block_bb = self.blocks.getBlockCellBB(self.blocks[0])
......@@ -144,7 +144,7 @@ class ParallelDataHandling(DataHandling):
field_type=field_type)
self.fields[name].latex_name = latex_name
self._field_name_to_cpu_data_name[name] = name
if gpu:
if acc:
self._field_name_to_gpu_data_name[name] = self.GPU_DATA_PREFIX + name
return self.fields[name]
......@@ -159,18 +159,18 @@ class ParallelDataHandling(DataHandling):
def custom_data_names(self):
return tuple(self._custom_data_names)
def add_array_like(self, name, name_of_template_field, latex_name=None, cpu=True, gpu=None):
return self.add_array(name, latex_name=latex_name, cpu=cpu, gpu=gpu,
def add_array_like(self, name, name_of_template_field, latex_name=None, cpu=True, acc=None):
return self.add_array(name, latex_name=latex_name, cpu=cpu, acc=acc,
**self._fieldInformation[name_of_template_field])
def swap(self, name1, name2, gpu=False):
if gpu:
def swap(self, name1, name2, acc=False):
if acc:
name1 = self.GPU_DATA_PREFIX + name1
name2 = self.GPU_DATA_PREFIX + name2
for block in self.blocks:
block[name1].swapDataPointers(block[name2])
def iterate(self, slice_obj=None, gpu=False, ghost_layers=True, inner_ghost_layers=True):
def iterate(self, slice_obj=None, acc=False, ghost_layers=True, inner_ghost_layers=True):
if ghost_layers is True:
ghost_layers = self.default_ghost_layers
elif ghost_layers is False:
......@@ -185,7 +185,7 @@ class ParallelDataHandling(DataHandling):
elif isinstance(ghost_layers, str):
ghost_layers = self.ghost_layers_of_field(ghost_layers)
prefix = self.GPU_DATA_PREFIX if gpu else ""
prefix = self.GPU_DATA_PREFIX if acc else ""
if slice_obj is not None:
yield from sliced_block_iteration(self.blocks, slice_obj, inner_ghost_layers, ghost_layers,
self.dim, prefix)
......@@ -229,7 +229,8 @@ class ParallelDataHandling(DataHandling):
kernel_function(**arg_dict)
def get_kernel_kwargs(self, kernel_function, **kwargs):
if kernel_function.ast.backend == 'gpucuda':
if kernel_function.ast.target in self.ACCELERATOR_TARGETS:
assert kernel_function.ast.target == 'cuda', 'ParallelDataHandling only supports CUDA and CPU'
name_map = self._field_name_to_gpu_data_name
to_array = wlb.cuda.toGpuArray
else:
......@@ -258,7 +259,7 @@ class ParallelDataHandling(DataHandling):
else:
wlb.cuda.copyFieldToCpu(self.blocks, self.GPU_DATA_PREFIX + name, name)
def to_gpu(self, name):
def to_acc(self, name):
if name in self._custom_data_transfer_functions:
transfer_func = self._custom_data_transfer_functions[name][0]
for block in self.blocks:
......@@ -266,7 +267,7 @@ class ParallelDataHandling(DataHandling):
else:
wlb.cuda.copyFieldToGpu(self.blocks, self.GPU_DATA_PREFIX + name, name)
def is_on_gpu(self, name):
def is_on_acc(self, name):
return (name, self.GPU_DATA_PREFIX + name) in self._cpu_gpu_pairs
def all_to_cpu(self):
......@@ -275,21 +276,15 @@ class ParallelDataHandling(DataHandling):
for name in self._custom_data_transfer_functions.keys():
self.to_cpu(name)
def all_to_gpu(self):
def all_to_acc(self):
for cpu_name, gpu_name in self._cpu_gpu_pairs:
wlb.cuda.copyFieldToGpu(self.blocks, gpu_name, cpu_name)
for name in self._custom_data_transfer_functions.keys():
self.to_gpu(name)
self.to_acc(name)
def synchronization_function_cpu(self, names, stencil=None, buffered=True, stencil_restricted=False, **_):
return self.synchronization_function(names, stencil, 'cpu', buffered, stencil_restricted)
def synchronization_function_gpu(self, names, stencil=None, buffered=True, stencil_restricted=False, **_):
return self.synchronization_function(names, stencil, 'gpu', buffered, stencil_restricted)
def synchronization_function(self, names, stencil=None, target=None, buffered=True, stencil_restricted=False):
if target is None:
target = self.default_target
def synchronization_function(self, names, stencil=None, acc=None, buffered=True, stencil_restricted=False):
if acc is None:
target = self.target
if stencil is None:
stencil = 'D3Q27' if self.dim == 3 else 'D2Q9'
......@@ -298,12 +293,12 @@ class ParallelDataHandling(DataHandling):
names = [names]
create_scheme = wlb.createUniformBufferedScheme if buffered else wlb.createUniformDirectScheme
if target == 'cpu':
if not acc:
create_packing = wlb.field.createPackInfo if buffered else wlb.field.createMPIDatatypeInfo
if not buffered and stencil_restricted:
create_packing = wlb.field.createStencilRestrictedPackInfo
else:
assert target == 'gpu'
assert self.target == 'cuda'
create_packing = wlb.cuda.createPackInfo if buffered else wlb.cuda.createMPIDatatypeInfo
names = [self.GPU_DATA_PREFIX + name for name in names]
......
......@@ -25,7 +25,7 @@ class PyCudaArrayHandler:
else:
return gpuarray.empty(shape, dtype)
def to_gpu(self, array):
def to_acc(self, array):
return gpuarray.to_gpu(array)
def upload(self, gpuarray, numpy_array):
......
......@@ -10,7 +10,7 @@ import pystencils
class PyOpenClArrayHandler:
def __init__(self, queue):
def __init__(self, queue=None):
if not queue:
from pystencils.opencl.opencljit import get_global_cl_queue
queue = get_global_cl_queue()
......@@ -31,7 +31,7 @@ class PyOpenClArrayHandler:
else:
return gpuarray.empty(self.queue, shape, dtype)