Commit 20d9c9d4 authored by Markus Holzer's avatar Markus Holzer
Browse files

Removed OpenCL

parent e7b9976b
...@@ -4,3 +4,4 @@ ...@@ -4,3 +4,4 @@
### Removed ### Removed
* LLVM backend because it was not used much and not good integrated in pystencils. * LLVM backend because it was not used much and not good integrated in pystencils.
* OpenCL backend because it was not used much and not good integrated in pystencils.
...@@ -53,7 +53,6 @@ Without `[interactive]` you get a minimal version with very little dependencies. ...@@ -53,7 +53,6 @@ Without `[interactive]` you get a minimal version with very little dependencies.
All options: All options:
- `gpu`: use this if an NVIDIA GPU is available and CUDA is installed - `gpu`: use this if an NVIDIA GPU is available and CUDA is installed
- `opencl`: basic OpenCL support (experimental)
- `alltrafos`: pulls in additional dependencies for loop simplification e.g. libisl - `alltrafos`: pulls in additional dependencies for loop simplification e.g. libisl
- `bench_db`: functionality to store benchmark result in object databases - `bench_db`: functionality to store benchmark result in object databases
- `interactive`: installs dependencies to work in Jupyter including image I/O, plotting etc. - `interactive`: installs dependencies to work in Jupyter including image I/O, plotting etc.
......
...@@ -47,7 +47,7 @@ def generate_c(ast_node: Node, ...@@ -47,7 +47,7 @@ def generate_c(ast_node: Node,
Args: Args:
ast_node: ast representation of kernel ast_node: ast representation of kernel
signature_only: generate signature without function body signature_only: generate signature without function body
dialect: `Backend`: 'C', 'CUDA' or 'OPENCL' dialect: `Backend`: 'C' or 'CUDA'
custom_backend: use own custom printer for code generation custom_backend: use own custom printer for code generation
with_globals: enable usage of global variables with_globals: enable usage of global variables
Returns: Returns:
...@@ -71,9 +71,6 @@ def generate_c(ast_node: Node, ...@@ -71,9 +71,6 @@ def generate_c(ast_node: Node,
elif dialect == Backend.CUDA: elif dialect == Backend.CUDA:
from pystencils.backends.cuda_backend import CudaBackend from pystencils.backends.cuda_backend import CudaBackend
printer = CudaBackend(signature_only=signature_only) printer = CudaBackend(signature_only=signature_only)
elif dialect == Backend.OPENCL:
from pystencils.backends.opencl_backend import OpenClBackend
printer = OpenClBackend(signature_only=signature_only)
else: else:
raise ValueError(f'Unknown {dialect=}') raise ValueError(f'Unknown {dialect=}')
code = printer(ast_node) code = printer(ast_node)
......
acos
acosh
acospi
asin
asinh
asinpi
atan
atan2
atanh
atanpi
atan2pi
cbrt
ceil
copysign
cos
cosh
cospi
erfc
erf
exp
exp2
exp10
expm1
fabs
fdim
floor
fma
fmax
fmax
fmin45
fmin
fmod
fract
frexp
hypot
ilogb
ldexp
lgamma
lgamma_r
log
log2
log10
log1p
logb
mad
maxmag
minmag
modf
nextafter
pow
pown
powr
remquo
intn
remquo
rint
rootn
rootn
round
rsqrt
sin
sincos
sinh
sinpi
sqrt
tan
tanh
tanpi
tgamma
trunc
half_cos
half_divide
half_exp
half_exp2
half_exp10
half_log
half_log2
half_log10
half_powr
half_recip
half_rsqrt
half_sin
half_sqrt
half_tan
native_cos
native_divide
native_exp
native_exp2
native_exp10
native_log
native_log2
native_log10
native_powr
native_recip
native_rsqrt
native_sin
native_sqrt
native_tan
from os.path import dirname, join
import pystencils.data_types
from pystencils.astnodes import Node
from pystencils.backends.cbackend import CustomSympyPrinter, generate_c
from pystencils.backends.cuda_backend import CudaBackend, CudaSympyPrinter
from pystencils.enums import Backend
from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
with open(join(dirname(__file__), 'opencl1.1_known_functions.txt')) as f:
lines = f.readlines()
OPENCL_KNOWN_FUNCTIONS = {l.strip(): l.strip() for l in lines if l}
def generate_opencl(ast_node: Node, signature_only: bool = False, custom_backend=None, with_globals=True) -> str:
"""Prints an abstract syntax tree node (made for `Target` 'GPU') as OpenCL code. # TODO Backend instead of Target?
Args:
ast_node: ast representation of kernel
signature_only: generate signature without function body
custom_backend: use own custom printer for code generation
with_globals: enable usage of global variables
Returns:
OpenCL code for the ast node and its descendants
"""
return generate_c(ast_node, signature_only, dialect=Backend.OPENCL,
custom_backend=custom_backend, with_globals=with_globals)
class OpenClBackend(CudaBackend):
def __init__(self,
sympy_printer=None,
signature_only=False):
if not sympy_printer:
sympy_printer = OpenClSympyPrinter()
super().__init__(sympy_printer, signature_only)
self._dialect = Backend.OPENCL
def _print_Type(self, node):
code = super()._print_Type(node)
if isinstance(node, pystencils.data_types.PointerType):
return "__global " + code
else:
return code
def _print_ThreadBlockSynchronization(self, node):
raise NotImplementedError()
def _print_TextureDeclaration(self, node):
raise NotImplementedError()
class OpenClSympyPrinter(CudaSympyPrinter):
language = "OpenCL"
DIMENSION_MAPPING = {
'x': '0',
'y': '1',
'z': '2'
}
INDEXING_FUNCTION_MAPPING = {
'blockIdx': 'get_group_id',
'threadIdx': 'get_local_id',
'blockDim': 'get_local_size',
'gridDim': 'get_global_size'
}
def __init__(self):
CustomSympyPrinter.__init__(self)
self.known_functions = OPENCL_KNOWN_FUNCTIONS
def _print_Type(self, node):
code = super()._print_Type(node)
if isinstance(node, pystencils.data_types.PointerType):
return "__global " + code
else:
return code
def _print_ThreadIndexingSymbol(self, node):
symbol_name: str = node.name
function_name, dimension = tuple(symbol_name.split("."))
dimension = self.DIMENSION_MAPPING[dimension]
function_name = self.INDEXING_FUNCTION_MAPPING[function_name]
return f"(int64_t) {function_name}({dimension})"
def _print_TextureAccess(self, node):
raise NotImplementedError()
# For math functions, OpenCL is more similar to the C++ printer CustomSympyPrinter
# since built-in math functions are generic.
# In CUDA, you have to differentiate between `sin` and `sinf`
try:
_print_math_func = CustomSympyPrinter._print_math_func
except AttributeError:
pass
_print_Pow = CustomSympyPrinter._print_Pow
def _print_Function(self, expr):
if isinstance(expr, fast_division):
return "native_divide(%s, %s)" % tuple(self._print(a) for a in expr.args)
elif isinstance(expr, fast_sqrt):
return f"native_sqrt({tuple(self._print(a) for a in expr.args)})"
elif isinstance(expr, fast_inv_sqrt):
return f"native_rsqrt({tuple(self._print(a) for a in expr.args)})"
return CustomSympyPrinter._print_Function(self, expr)
...@@ -23,8 +23,7 @@ def create_data_handling(domain_size: Tuple[int, ...], ...@@ -23,8 +23,7 @@ def create_data_handling(domain_size: Tuple[int, ...],
default_layout: str = 'SoA', default_layout: str = 'SoA',
default_target: Target = Target.CPU, default_target: Target = Target.CPU,
parallel: bool = False, parallel: bool = False,
default_ghost_layers: int = 1, default_ghost_layers: int = 1) -> DataHandling:
opencl_queue=None) -> DataHandling:
"""Creates a data handling instance. """Creates a data handling instance.
Args: Args:
...@@ -43,7 +42,6 @@ def create_data_handling(domain_size: Tuple[int, ...], ...@@ -43,7 +42,6 @@ def create_data_handling(domain_size: Tuple[int, ...],
default_target = new_target default_target = new_target
if parallel: if parallel:
assert not opencl_queue, "OpenCL is only supported for SerialDataHandling"
if wlb is None: if wlb is None:
raise ValueError("Cannot create parallel data handling because walberla module is not available") raise ValueError("Cannot create parallel data handling because walberla module is not available")
...@@ -71,8 +69,7 @@ def create_data_handling(domain_size: Tuple[int, ...], ...@@ -71,8 +69,7 @@ def create_data_handling(domain_size: Tuple[int, ...],
periodicity=periodicity, periodicity=periodicity,
default_target=default_target, default_target=default_target,
default_layout=default_layout, default_layout=default_layout,
default_ghost_layers=default_ghost_layers, default_ghost_layers=default_ghost_layers)
opencl_queue=opencl_queue)
__all__ = ['create_data_handling'] __all__ = ['create_data_handling']
...@@ -17,8 +17,8 @@ class DataHandling(ABC): ...@@ -17,8 +17,8 @@ class DataHandling(ABC):
'gather' function that has collects (parts of the) distributed data on a single process. 'gather' function that has collects (parts of the) distributed data on a single process.
""" """
_GPU_LIKE_TARGETS = [Target.GPU, Target.OPENCL] _GPU_LIKE_TARGETS = [Target.GPU]
_GPU_LIKE_BACKENDS = [Backend.CUDA, Backend.OPENCL] _GPU_LIKE_BACKENDS = [Backend.CUDA]
# ---------------------------- Adding and accessing data ----------------------------------------------------------- # ---------------------------- Adding and accessing data -----------------------------------------------------------
@property @property
......
try:
import pyopencl.array as gpuarray
except ImportError:
gpuarray = None
import numpy as np
import pystencils
class PyOpenClArrayHandler:
def __init__(self, queue):
if not queue:
from pystencils.opencl.opencljit import get_global_cl_queue
queue = get_global_cl_queue()
assert queue, "OpenCL queue missing!\n" \
"Use `import pystencils.opencl.autoinit` if you want it to be automatically created"
self.queue = queue
def zeros(self, shape, dtype=np.float64, order='C'):
cpu_array = np.zeros(shape=shape, dtype=dtype, order=order)
return self.to_gpu(cpu_array)
def ones(self, shape, dtype=np.float64, order='C'):
cpu_array = np.ones(shape=shape, dtype=dtype, order=order)
return self.to_gpu(cpu_array)
def empty(self, shape, dtype=np.float64, layout=None):
if layout:
cpu_array = pystencils.field.create_numpy_array_with_layout(shape=shape, dtype=dtype, layout=layout)
return self.to_gpu(cpu_array)
else:
return gpuarray.empty(self.queue, shape, dtype)
def to_gpu(self, array):
return gpuarray.to_device(self.queue, array)
def upload(self, gpuarray, numpy_array):
gpuarray.set(numpy_array, self.queue)
def download(self, gpuarray, numpy_array):
gpuarray.get(self.queue, numpy_array)
def randn(self, shape, dtype=np.float64):
cpu_array = np.random.randn(*shape).astype(dtype)
return self.from_numpy(cpu_array)
from_numpy = to_gpu
...@@ -7,7 +7,6 @@ import numpy as np ...@@ -7,7 +7,6 @@ import numpy as np
from pystencils.datahandling.blockiteration import SerialBlock from pystencils.datahandling.blockiteration import SerialBlock
from pystencils.datahandling.datahandling_interface import DataHandling from pystencils.datahandling.datahandling_interface import DataHandling
from pystencils.datahandling.pycuda import PyCudaArrayHandler, PyCudaNotAvailableHandler from pystencils.datahandling.pycuda import PyCudaArrayHandler, PyCudaNotAvailableHandler
from pystencils.datahandling.pyopencl import PyOpenClArrayHandler
from pystencils.enums import Target from pystencils.enums import Target
from pystencils.field import ( from pystencils.field import (
Field, FieldType, create_numpy_array_with_layout, layout_string_to_tuple, Field, FieldType, create_numpy_array_with_layout, layout_string_to_tuple,
...@@ -24,8 +23,6 @@ class SerialDataHandling(DataHandling): ...@@ -24,8 +23,6 @@ class SerialDataHandling(DataHandling):
default_layout: str = 'SoA', default_layout: str = 'SoA',
periodicity: Union[bool, Sequence[bool]] = False, periodicity: Union[bool, Sequence[bool]] = False,
default_target: Target = Target.CPU, default_target: Target = Target.CPU,
opencl_queue=None,
opencl_ctx=None,
array_handler=None) -> None: array_handler=None) -> None:
""" """
Creates a data handling for single node simulations. Creates a data handling for single node simulations.
...@@ -48,17 +45,12 @@ class SerialDataHandling(DataHandling): ...@@ -48,17 +45,12 @@ class SerialDataHandling(DataHandling):
self.custom_data_cpu = DotDict() self.custom_data_cpu = DotDict()
self.custom_data_gpu = DotDict() self.custom_data_gpu = DotDict()
self._custom_data_transfer_functions = {} self._custom_data_transfer_functions = {}
self._opencl_queue = opencl_queue
self._opencl_ctx = opencl_ctx
if not array_handler: if not array_handler:
try: try:
self.array_handler = PyCudaArrayHandler() self.array_handler = PyCudaArrayHandler()
except Exception: except Exception:
self.array_handler = PyCudaNotAvailableHandler() self.array_handler = PyCudaNotAvailableHandler()
if default_target == Target.OPENCL or opencl_queue:
self.array_handler = PyOpenClArrayHandler(opencl_queue)
else: else:
self.array_handler = array_handler self.array_handler = array_handler
...@@ -280,8 +272,6 @@ class SerialDataHandling(DataHandling): ...@@ -280,8 +272,6 @@ class SerialDataHandling(DataHandling):
def synchronization_function(self, names, stencil=None, target=None, functor=None, **_): def synchronization_function(self, names, stencil=None, target=None, functor=None, **_):
if target is None: if target is None:
target = self.default_target target = self.default_target
if target == Target.OPENCL: # TODO potential misuse between Target and Backend
target = Target.GPU
assert target in (Target.CPU, Target.GPU) assert target in (Target.CPU, Target.GPU)
if not hasattr(names, '__len__') or type(names) is str: if not hasattr(names, '__len__') or type(names) is str:
names = [names] names = [names]
...@@ -324,16 +314,13 @@ class SerialDataHandling(DataHandling): ...@@ -324,16 +314,13 @@ class SerialDataHandling(DataHandling):
else: else:
if functor is None: if functor is None:
from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as functor from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as functor
target = Target.GPU if not isinstance(self.array_handler, target = Target.GPU
PyOpenClArrayHandler) else Target.OPENCL
result.append(functor(filtered_stencil, self._domainSize, result.append(functor(filtered_stencil, self._domainSize,
index_dimensions=self.fields[name].index_dimensions, index_dimensions=self.fields[name].index_dimensions,
index_dim_shape=values_per_cell, index_dim_shape=values_per_cell,
dtype=self.fields[name].dtype.numpy_dtype, dtype=self.fields[name].dtype.numpy_dtype,
ghost_layers=gls, ghost_layers=gls,
target=target, target=target))
opencl_queue=self._opencl_queue,
opencl_ctx=self._opencl_ctx))
if target == Target.CPU: if target == Target.CPU:
def result_functor(): def result_functor():
......
...@@ -46,7 +46,7 @@ def get_code_obj(ast: Union[KernelFunction, KernelWrapper], custom_backend=None) ...@@ -46,7 +46,7 @@ def get_code_obj(ast: Union[KernelFunction, KernelWrapper], custom_backend=None)
if isinstance(ast, KernelWrapper): if isinstance(ast, KernelWrapper):
ast = ast.ast ast = ast.ast
if ast.backend not in {Backend.C, Backend.CUDA, Backend.OPENCL}: if ast.backend not in {Backend.C, Backend.CUDA}:
raise NotImplementedError(f'get_code_obj is not implemented for backend {ast.backend}') raise NotImplementedError(f'get_code_obj is not implemented for backend {ast.backend}')
dialect = ast.backend dialect = ast.backend
......
...@@ -13,10 +13,6 @@ class Target(Enum): ...@@ -13,10 +13,6 @@ class Target(Enum):
""" """
Target GPU architecture. Target GPU architecture.
""" """
OPENCL = auto()
"""
Target all architectures OpenCL covers (Thus both, Target and Backend)
"""
class Backend(Enum): class Backend(Enum):
...@@ -32,7 +28,3 @@ class Backend(Enum): ...@@ -32,7 +28,3 @@ class Backend(Enum):
""" """
Use the CUDA backend to generate code for NVIDIA GPUs. Use the CUDA backend to generate code for NVIDIA GPUs.
""" """
OPENCL = auto()
"""
Use the OpenCL backend to generate code for OpenCL.
"""
...@@ -2,7 +2,6 @@ import numpy as np ...@@ -2,7 +2,6 @@ import numpy as np
from itertools import product from itertools import product
import pystencils.gpucuda import pystencils.gpucuda
import pystencils.opencl
from pystencils import Assignment, Field from pystencils import Assignment, Field
from pystencils.gpucuda.kernelcreation import create_cuda_kernel from pystencils.gpucuda.kernelcreation import create_cuda_kernel
from pystencils.enums import Target from pystencils.enums import Target
...@@ -33,19 +32,14 @@ def create_copy_kernel(domain_size, from_slice, to_slice, index_dimensions=0, in ...@@ -33,19 +32,14 @@ def create_copy_kernel(domain_size, from_slice, to_slice, index_dimensions=0, in
# TODO: type flot is dangerous here # TODO: type flot is dangerous here
def get_periodic_boundary_functor(stencil, domain_size, index_dimensions=0, index_dim_shape=1, ghost_layers=1, def get_periodic_boundary_functor(stencil, domain_size, index_dimensions=0, index_dim_shape=1, ghost_layers=1,
thickness=None, dtype=float, target=Target.GPU, opencl_queue=None, opencl_ctx=None): thickness=None, dtype=float, target=Target.GPU):
assert target in {Target.GPU, Target.OPENCL} assert target in {Target.GPU}
src_dst_slice_tuples = get_periodic_boundary_src_dst_slices(stencil, ghost_layers, thickness) src_dst_slice_tuples = get_periodic_boundary_src_dst_slices(stencil, ghost_layers, thickness)
kernels = [] kernels = []
for src_slice, dst_slice in src_dst_slice_tuples: for src_slice, dst_slice in src_dst_slice_tuples:
ast = create_copy_kernel(domain_size, src_slice, dst_slice, index_dimensions, index_dim_shape, dtype) ast = create_copy_kernel(domain_size, src_slice, dst_slice, index_dimensions, index_dim_shape, dtype)
if target == pystencils.Target.GPU: kernels.append(pystencils.gpucuda.make_python_function(ast))
kernels.append(pystencils.gpucuda.make_python_function(ast))
else:
ast._target = pystencils.Target.OPENCL
ast._backend = pystencils.Backend.OPENCL
kernels.append(pystencils.opencl.make_python_function(ast, opencl_queue, opencl_ctx))
def functor(pdfs, **_): def functor(pdfs, **_):
for kernel in kernels: for kernel in kernels:
......
# -*- coding: utf-8 -*-
#
# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
#
# Distributed under terms of the GPLv3 license.
"""
"""
from typing import Union
import numpy as np
try:
import pycuda.driver as cuda
from pycuda import gpuarray
import pycuda
except Exception:
pass
def ndarray_to_tex(tex_ref, # type: Union[cuda.TextureReference, cuda.SurfaceReference]
ndarray,
address_mode=None,
filter_mode=None,
use_normalized_coordinates=False,
read_as_integer=False):
if isinstance(address_mode, str):
address_mode = getattr(pycuda.driver.address_mode, address_mode.upper())
if address_mode is None:
address_mode = cuda.address_mode.BORDER
if filter_mode is None:
filter_mode = cuda.filter_mode.LINEAR
if isinstance(ndarray, np.ndarray):
cu_array = cuda.np_to_array(ndarray, 'C')
elif isinstance(ndarray, gpuarray.GPUArray):
cu_array = cuda.gpuarray_to_array(ndarray, 'C')
else:
raise TypeError(
'ndarray must be numpy.ndarray or pycuda.gpuarray.GPUArray')
tex_ref.set_array(cu_array)
tex_ref.set_address_mode(0, address_mode)
if ndarray.ndim >= 2:
tex_ref.set_address_mode(1, address_mode)
if ndarray.ndim >= 3:
tex_ref.set_address_mode(2, address_mode)
tex_ref.set_filter_mode(filter_mode)
if not use_normalized_coordinates: