Commit 30da6576 authored by Jan Hönig's avatar Jan Hönig
Browse files

Merge branch 'RemoveOpenCL' into 'master'

Removed OpenCL

See merge request pycodegen/pystencils!278
parents 0ed1a87b 9afc38bb
Pipeline #35861 passed with stages
in 24 minutes and 13 seconds
......@@ -4,3 +4,4 @@
### Removed
* LLVM backend because it was not used much and not good integrated in pystencils.
* OpenCL backend because it was not used much and not good integrated in pystencils.
......@@ -53,7 +53,6 @@ Without `[interactive]` you get a minimal version with very little dependencies.
All options:
- `gpu`: use this if an NVIDIA GPU is available and CUDA is installed
- `opencl`: basic OpenCL support (experimental)
- `alltrafos`: pulls in additional dependencies for loop simplification e.g. libisl
- `bench_db`: functionality to store benchmark result in object databases
- `interactive`: installs dependencies to work in Jupyter including image I/O, plotting etc.
......
......@@ -47,7 +47,7 @@ def generate_c(ast_node: Node,
Args:
ast_node: ast representation of kernel
signature_only: generate signature without function body
dialect: `Backend`: 'C', 'CUDA' or 'OPENCL'
dialect: `Backend`: 'C' or 'CUDA'
custom_backend: use own custom printer for code generation
with_globals: enable usage of global variables
Returns:
......@@ -71,9 +71,6 @@ def generate_c(ast_node: Node,
elif dialect == Backend.CUDA:
from pystencils.backends.cuda_backend import CudaBackend
printer = CudaBackend(signature_only=signature_only)
elif dialect == Backend.OPENCL:
from pystencils.backends.opencl_backend import OpenClBackend
printer = OpenClBackend(signature_only=signature_only)
else:
raise ValueError(f'Unknown {dialect=}')
code = printer(ast_node)
......
acos
acosh
acospi
asin
asinh
asinpi
atan
atan2
atanh
atanpi
atan2pi
cbrt
ceil
copysign
cos
cosh
cospi
erfc
erf
exp
exp2
exp10
expm1
fabs
fdim
floor
fma
fmax
fmax
fmin45
fmin
fmod
fract
frexp
hypot
ilogb
ldexp
lgamma
lgamma_r
log
log2
log10
log1p
logb
mad
maxmag
minmag
modf
nextafter
pow
pown
powr
remquo
intn
remquo
rint
rootn
rootn
round
rsqrt
sin
sincos
sinh
sinpi
sqrt
tan
tanh
tanpi
tgamma
trunc
half_cos
half_divide
half_exp
half_exp2
half_exp10
half_log
half_log2
half_log10
half_powr
half_recip
half_rsqrt
half_sin
half_sqrt
half_tan
native_cos
native_divide
native_exp
native_exp2
native_exp10
native_log
native_log2
native_log10
native_powr
native_recip
native_rsqrt
native_sin
native_sqrt
native_tan
from os.path import dirname, join
import pystencils.data_types
from pystencils.astnodes import Node
from pystencils.backends.cbackend import CustomSympyPrinter, generate_c
from pystencils.backends.cuda_backend import CudaBackend, CudaSympyPrinter
from pystencils.enums import Backend
from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
with open(join(dirname(__file__), 'opencl1.1_known_functions.txt')) as f:
lines = f.readlines()
OPENCL_KNOWN_FUNCTIONS = {l.strip(): l.strip() for l in lines if l}
def generate_opencl(ast_node: Node, signature_only: bool = False, custom_backend=None, with_globals=True) -> str:
"""Prints an abstract syntax tree node (made for `Target` 'GPU') as OpenCL code. # TODO Backend instead of Target?
Args:
ast_node: ast representation of kernel
signature_only: generate signature without function body
custom_backend: use own custom printer for code generation
with_globals: enable usage of global variables
Returns:
OpenCL code for the ast node and its descendants
"""
return generate_c(ast_node, signature_only, dialect=Backend.OPENCL,
custom_backend=custom_backend, with_globals=with_globals)
class OpenClBackend(CudaBackend):
def __init__(self,
sympy_printer=None,
signature_only=False):
if not sympy_printer:
sympy_printer = OpenClSympyPrinter()
super().__init__(sympy_printer, signature_only)
self._dialect = Backend.OPENCL
def _print_Type(self, node):
code = super()._print_Type(node)
if isinstance(node, pystencils.data_types.PointerType):
return "__global " + code
else:
return code
def _print_ThreadBlockSynchronization(self, node):
raise NotImplementedError()
def _print_TextureDeclaration(self, node):
raise NotImplementedError()
class OpenClSympyPrinter(CudaSympyPrinter):
language = "OpenCL"
DIMENSION_MAPPING = {
'x': '0',
'y': '1',
'z': '2'
}
INDEXING_FUNCTION_MAPPING = {
'blockIdx': 'get_group_id',
'threadIdx': 'get_local_id',
'blockDim': 'get_local_size',
'gridDim': 'get_global_size'
}
def __init__(self):
CustomSympyPrinter.__init__(self)
self.known_functions = OPENCL_KNOWN_FUNCTIONS
def _print_Type(self, node):
code = super()._print_Type(node)
if isinstance(node, pystencils.data_types.PointerType):
return "__global " + code
else:
return code
def _print_ThreadIndexingSymbol(self, node):
symbol_name: str = node.name
function_name, dimension = tuple(symbol_name.split("."))
dimension = self.DIMENSION_MAPPING[dimension]
function_name = self.INDEXING_FUNCTION_MAPPING[function_name]
return f"(int64_t) {function_name}({dimension})"
def _print_TextureAccess(self, node):
raise NotImplementedError()
# For math functions, OpenCL is more similar to the C++ printer CustomSympyPrinter
# since built-in math functions are generic.
# In CUDA, you have to differentiate between `sin` and `sinf`
try:
_print_math_func = CustomSympyPrinter._print_math_func
except AttributeError:
pass
_print_Pow = CustomSympyPrinter._print_Pow
def _print_Function(self, expr):
if isinstance(expr, fast_division):
return "native_divide(%s, %s)" % tuple(self._print(a) for a in expr.args)
elif isinstance(expr, fast_sqrt):
return f"native_sqrt({tuple(self._print(a) for a in expr.args)})"
elif isinstance(expr, fast_inv_sqrt):
return f"native_rsqrt({tuple(self._print(a) for a in expr.args)})"
return CustomSympyPrinter._print_Function(self, expr)
......@@ -23,8 +23,7 @@ def create_data_handling(domain_size: Tuple[int, ...],
default_layout: str = 'SoA',
default_target: Target = Target.CPU,
parallel: bool = False,
default_ghost_layers: int = 1,
opencl_queue=None) -> DataHandling:
default_ghost_layers: int = 1) -> DataHandling:
"""Creates a data handling instance.
Args:
......@@ -43,7 +42,6 @@ def create_data_handling(domain_size: Tuple[int, ...],
default_target = new_target
if parallel:
assert not opencl_queue, "OpenCL is only supported for SerialDataHandling"
if wlb is None:
raise ValueError("Cannot create parallel data handling because walberla module is not available")
......@@ -71,8 +69,7 @@ def create_data_handling(domain_size: Tuple[int, ...],
periodicity=periodicity,
default_target=default_target,
default_layout=default_layout,
default_ghost_layers=default_ghost_layers,
opencl_queue=opencl_queue)
default_ghost_layers=default_ghost_layers)
__all__ = ['create_data_handling']
......@@ -17,8 +17,8 @@ class DataHandling(ABC):
'gather' function that has collects (parts of the) distributed data on a single process.
"""
_GPU_LIKE_TARGETS = [Target.GPU, Target.OPENCL]
_GPU_LIKE_BACKENDS = [Backend.CUDA, Backend.OPENCL]
_GPU_LIKE_TARGETS = [Target.GPU]
_GPU_LIKE_BACKENDS = [Backend.CUDA]
# ---------------------------- Adding and accessing data -----------------------------------------------------------
@property
......
try:
import pyopencl.array as gpuarray
except ImportError:
gpuarray = None
import numpy as np
import pystencils
class PyOpenClArrayHandler:
def __init__(self, queue):
if not queue:
from pystencils.opencl.opencljit import get_global_cl_queue
queue = get_global_cl_queue()
assert queue, "OpenCL queue missing!\n" \
"Use `import pystencils.opencl.autoinit` if you want it to be automatically created"
self.queue = queue
def zeros(self, shape, dtype=np.float64, order='C'):
cpu_array = np.zeros(shape=shape, dtype=dtype, order=order)
return self.to_gpu(cpu_array)
def ones(self, shape, dtype=np.float64, order='C'):
cpu_array = np.ones(shape=shape, dtype=dtype, order=order)
return self.to_gpu(cpu_array)
def empty(self, shape, dtype=np.float64, layout=None):
if layout:
cpu_array = pystencils.field.create_numpy_array_with_layout(shape=shape, dtype=dtype, layout=layout)
return self.to_gpu(cpu_array)
else:
return gpuarray.empty(self.queue, shape, dtype)
def to_gpu(self, array):
return gpuarray.to_device(self.queue, array)
def upload(self, gpuarray, numpy_array):
gpuarray.set(numpy_array, self.queue)
def download(self, gpuarray, numpy_array):
gpuarray.get(self.queue, numpy_array)
def randn(self, shape, dtype=np.float64):
cpu_array = np.random.randn(*shape).astype(dtype)
return self.from_numpy(cpu_array)
from_numpy = to_gpu
......@@ -7,7 +7,6 @@ import numpy as np
from pystencils.datahandling.blockiteration import SerialBlock
from pystencils.datahandling.datahandling_interface import DataHandling
from pystencils.datahandling.pycuda import PyCudaArrayHandler, PyCudaNotAvailableHandler
from pystencils.datahandling.pyopencl import PyOpenClArrayHandler
from pystencils.enums import Target
from pystencils.field import (
Field, FieldType, create_numpy_array_with_layout, layout_string_to_tuple,
......@@ -24,8 +23,6 @@ class SerialDataHandling(DataHandling):
default_layout: str = 'SoA',
periodicity: Union[bool, Sequence[bool]] = False,
default_target: Target = Target.CPU,
opencl_queue=None,
opencl_ctx=None,
array_handler=None) -> None:
"""
Creates a data handling for single node simulations.
......@@ -48,17 +45,12 @@ class SerialDataHandling(DataHandling):
self.custom_data_cpu = DotDict()
self.custom_data_gpu = DotDict()
self._custom_data_transfer_functions = {}
self._opencl_queue = opencl_queue
self._opencl_ctx = opencl_ctx
if not array_handler:
try:
self.array_handler = PyCudaArrayHandler()
except Exception:
self.array_handler = PyCudaNotAvailableHandler()
if default_target == Target.OPENCL or opencl_queue:
self.array_handler = PyOpenClArrayHandler(opencl_queue)
else:
self.array_handler = array_handler
......@@ -280,8 +272,6 @@ class SerialDataHandling(DataHandling):
def synchronization_function(self, names, stencil=None, target=None, functor=None, **_):
if target is None:
target = self.default_target
if target == Target.OPENCL: # TODO potential misuse between Target and Backend
target = Target.GPU
assert target in (Target.CPU, Target.GPU)
if not hasattr(names, '__len__') or type(names) is str:
names = [names]
......@@ -324,16 +314,13 @@ class SerialDataHandling(DataHandling):
else:
if functor is None:
from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as functor
target = Target.GPU if not isinstance(self.array_handler,
PyOpenClArrayHandler) else Target.OPENCL
target = Target.GPU
result.append(functor(filtered_stencil, self._domainSize,
index_dimensions=self.fields[name].index_dimensions,
index_dim_shape=values_per_cell,
dtype=self.fields[name].dtype.numpy_dtype,
ghost_layers=gls,
target=target,
opencl_queue=self._opencl_queue,
opencl_ctx=self._opencl_ctx))
target=target))
if target == Target.CPU:
def result_functor():
......
......@@ -46,7 +46,7 @@ def get_code_obj(ast: Union[KernelFunction, KernelWrapper], custom_backend=None)
if isinstance(ast, KernelWrapper):
ast = ast.ast
if ast.backend not in {Backend.C, Backend.CUDA, Backend.OPENCL}:
if ast.backend not in {Backend.C, Backend.CUDA}:
raise NotImplementedError(f'get_code_obj is not implemented for backend {ast.backend}')
dialect = ast.backend
......
......@@ -13,10 +13,6 @@ class Target(Enum):
"""
Target GPU architecture.
"""
OPENCL = auto()
"""
Target all architectures OpenCL covers (Thus both, Target and Backend)
"""
class Backend(Enum):
......@@ -32,7 +28,3 @@ class Backend(Enum):
"""
Use the CUDA backend to generate code for NVIDIA GPUs.
"""
OPENCL = auto()
"""
Use the OpenCL backend to generate code for OpenCL.
"""
......@@ -2,7 +2,6 @@ import numpy as np
from itertools import product
import pystencils.gpucuda
import pystencils.opencl
from pystencils import Assignment, Field
from pystencils.gpucuda.kernelcreation import create_cuda_kernel
from pystencils.enums import Target
......@@ -32,19 +31,14 @@ def create_copy_kernel(domain_size, from_slice, to_slice, index_dimensions=0, in
def get_periodic_boundary_functor(stencil, domain_size, index_dimensions=0, index_dim_shape=1, ghost_layers=1,
thickness=None, dtype=float, target=Target.GPU, opencl_queue=None, opencl_ctx=None):
assert target in {Target.GPU, Target.OPENCL}
thickness=None, dtype=float, target=Target.GPU):
assert target in {Target.GPU}
src_dst_slice_tuples = get_periodic_boundary_src_dst_slices(stencil, ghost_layers, thickness)
kernels = []
for src_slice, dst_slice in src_dst_slice_tuples:
ast = create_copy_kernel(domain_size, src_slice, dst_slice, index_dimensions, index_dim_shape, dtype)
if target == pystencils.Target.GPU:
kernels.append(pystencils.gpucuda.make_python_function(ast))
else:
ast._target = pystencils.Target.OPENCL
ast._backend = pystencils.Backend.OPENCL
kernels.append(pystencils.opencl.make_python_function(ast, opencl_queue, opencl_ctx))
kernels.append(pystencils.gpucuda.make_python_function(ast))
def functor(pdfs, **_):
for kernel in kernels:
......
# -*- coding: utf-8 -*-
#
# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
#
# Distributed under terms of the GPLv3 license.
"""
"""
from typing import Union
import numpy as np
try:
import pycuda.driver as cuda
from pycuda import gpuarray
import pycuda
except Exception:
pass
def ndarray_to_tex(tex_ref, # type: Union[cuda.TextureReference, cuda.SurfaceReference]
ndarray,
address_mode=None,
filter_mode=None,
use_normalized_coordinates=False,
read_as_integer=False):
if isinstance(address_mode, str):
address_mode = getattr(pycuda.driver.address_mode, address_mode.upper())
if address_mode is None:
address_mode = cuda.address_mode.BORDER
if filter_mode is None:
filter_mode = cuda.filter_mode.LINEAR
if isinstance(ndarray, np.ndarray):
cu_array = cuda.np_to_array(ndarray, 'C')
elif isinstance(ndarray, gpuarray.GPUArray):
cu_array = cuda.gpuarray_to_array(ndarray, 'C')
else:
raise TypeError(
'ndarray must be numpy.ndarray or pycuda.gpuarray.GPUArray')
tex_ref.set_array(cu_array)
tex_ref.set_address_mode(0, address_mode)
if ndarray.ndim >= 2:
tex_ref.set_address_mode(1, address_mode)
if ndarray.ndim >= 3:
tex_ref.set_address_mode(2, address_mode)
tex_ref.set_filter_mode(filter_mode)
if not use_normalized_coordinates:
tex_ref.set_flags(tex_ref.get_flags() & ~cuda.TRSF_NORMALIZED_COORDINATES)
if not read_as_integer:
tex_ref.set_flags(tex_ref.get_flags() & ~cuda.TRSF_READ_AS_INTEGER)
import functools
import itertools
import warnings
from dataclasses import dataclass, field
......@@ -105,14 +104,6 @@ class CreateKernelConfig:
"""
If set to `True`, auto can be used in the generated code for data types. This makes the type system more robust.
"""
opencl_queue: Any = None
"""
OpenCL queue if OpenCL target is used.
"""
opencl_ctx: Any = None
"""
OpenCL context if OpenCL target is used.
"""
index_fields: List[Field] = None
"""
List of index fields, i.e. 1D fields with struct data type. If not `None`, `create_index_kernel`
......@@ -136,8 +127,6 @@ class CreateKernelConfig:
self.backend = Backend.C
elif self.target == Target.GPU:
self.backend = Backend.CUDA
elif self.target == Target.OPENCL:
self.backend = Backend.OPENCL
else:
raise NotImplementedError(f'Target {self.target} has no default backend')
......@@ -274,20 +263,14 @@ def create_domain_kernel(assignments: List[Assignment], *, config: CreateKernelC
raise ValueError("Blocking cannot be combined with cacheline-zeroing")
else:
raise ValueError("Invalid value for cpu_vectorize_info")
elif config.target == Target.GPU or config.target == Target.OPENCL:
if config.backend == Backend.CUDA or config.backend == Backend.OPENCL:
elif config.target == Target.GPU:
if config.backend == Backend.CUDA:
from pystencils.gpucuda import create_cuda_kernel
ast = create_cuda_kernel(assignments, function_name=config.function_name, type_info=config.data_type,
indexing_creator=indexing_creator_from_params(config.gpu_indexing,
config.gpu_indexing_params),
iteration_slice=config.iteration_slice, ghost_layers=config.ghost_layers,
skip_independence_check=config.skip_independence_check)
if config.backend == Backend.OPENCL:
from pystencils.opencl.opencljit import make_python_function
ast._backend = config.backend
ast.compile = functools.partial(make_python_function, ast, config.opencl_queue, config.opencl_ctx)
ast._target = config.target
ast._backend = config.backend
if not ast:
raise NotImplementedError(
......@@ -349,8 +332,8 @@ def create_indexed_kernel(assignments: List[Assignment], *, config: CreateKernel
coordinate_names=config.coordinate_names)
if config.cpu_openmp:
add_openmp(ast, num_threads=config.cpu_openmp)
elif config.target == Target.GPU or config.target == Target.OPENCL:
if config.backend == Backend.CUDA or config.backend == Backend.OPENCL:
elif config.target == Target.GPU:
if config.backend == Backend.CUDA:
from pystencils.gpucuda import created_indexed_cuda_kernel
idx_creator = indexing_creator_from_params(config.gpu_indexing, config.gpu_indexing_params)
ast = created_indexed_cuda_kernel(assignments,
......@@ -358,12 +341,6 @@ def create_indexed_kernel(assignments: List[Assignment], *, config: CreateKernel
type_info=config.data_type,
coordinate_names=config.coordinate_names,
indexing_creator=idx_creator)
if config.backend == Backend.OPENCL:
from pystencils.opencl.opencljit import make_python_function
ast._backend = config.backend
ast.compile = functools.partial(make_python_function, ast, config.opencl_queue, config.opencl_ctx)
ast._target = config.target
ast._backend = config.backend
if not ast:
raise NotImplementedError(f'Indexed kernels are not yet supported for {config.target} with {config.backend}')
......
"""
"""
from pystencils.opencl.opencljit import (
clear_global_ctx, init_globally, init_globally_with_context, make_python_function)
__all__ = ['init_globally', 'init_globally_with_context', 'clear_global_ctx', 'make_python_function']