Commit c07ccb36 authored by Stephan Seitz's avatar Stephan Seitz
Browse files

Finish implementation OpenClBackend,OpenClSympyPrinter

parent 0b27d3c3
...@@ -59,8 +59,8 @@ def generate_c(ast_node: Node, signature_only: bool = False, dialect='c', custom ...@@ -59,8 +59,8 @@ def generate_c(ast_node: Node, signature_only: bool = False, dialect='c', custom
from pystencils.backends.cuda_backend import CudaBackend from pystencils.backends.cuda_backend import CudaBackend
printer = CudaBackend(signature_only=signature_only) printer = CudaBackend(signature_only=signature_only)
elif dialect == 'opencl': elif dialect == 'opencl':
from pystencils.backends.opencl_backend import OpenCLBackend from pystencils.backends.opencl_backend import OpenClBackend
printer = OpenCLBackend(signature_only=signature_only) printer = OpenClBackend(signature_only=signature_only)
else: else:
raise ValueError("Unknown dialect: " + str(dialect)) raise ValueError("Unknown dialect: " + str(dialect))
code = printer(ast_node) code = printer(ast_node)
...@@ -174,8 +174,11 @@ class CBackend: ...@@ -174,8 +174,11 @@ class CBackend:
return getattr(self, method_name)(node) return getattr(self, method_name)(node)
raise NotImplementedError(self.__class__.__name__ + " does not support node of type " + node.__class__.__name__) raise NotImplementedError(self.__class__.__name__ + " does not support node of type " + node.__class__.__name__)
def _print_Type(self, node):
return str(node)
def _print_KernelFunction(self, node): def _print_KernelFunction(self, node):
function_arguments = ["%s %s" % (str(s.symbol.dtype), s.symbol.name) for s in node.get_parameters()] function_arguments = ["%s %s" % (self._print(s.symbol.dtype), s.symbol.name) for s in node.get_parameters()]
launch_bounds = "" launch_bounds = ""
if self._dialect == 'cuda': if self._dialect == 'cuda':
max_threads = node.indexing.max_threads_per_block() max_threads = node.indexing.max_threads_per_block()
...@@ -210,7 +213,7 @@ class CBackend: ...@@ -210,7 +213,7 @@ class CBackend:
def _print_SympyAssignment(self, node): def _print_SympyAssignment(self, node):
if node.is_declaration: if node.is_declaration:
data_type = "const " + str(node.lhs.dtype) + " " if node.is_const else str(node.lhs.dtype) + " " data_type = "const " + self._print(node.lhs.dtype) + " " if node.is_const else self._print(node.lhs.dtype) + " "
return "%s%s = %s;" % (data_type, self.sympy_printer.doprint(node.lhs), return "%s%s = %s;" % (data_type, self.sympy_printer.doprint(node.lhs),
self.sympy_printer.doprint(node.rhs)) self.sympy_printer.doprint(node.rhs))
else: else:
......
from pystencils.backends.cuda_backend import CudaBackend from pystencils.backends.cuda_backend import CudaBackend, CudaSympyPrinter
from pystencils.backends.cbackend import generate_c from pystencils.backends.cbackend import generate_c
from pystencils.astnodes import Node from pystencils.astnodes import Node
import pystencils.data_types
def generate_opencl(astnode: Node, signature_only: bool = False) -> str: def generate_opencl(astnode: Node, signature_only: bool = False) -> str:
"""Prints an abstract syntax tree node as CUDA code. """Prints an abstract syntax tree node as CUDA code.
...@@ -15,5 +16,50 @@ def generate_opencl(astnode: Node, signature_only: bool = False) -> str: ...@@ -15,5 +16,50 @@ def generate_opencl(astnode: Node, signature_only: bool = False) -> str:
return generate_c(astnode, signature_only, dialect='opencl') return generate_c(astnode, signature_only, dialect='opencl')
class OpenCLBackend(CudaBackend): class OpenClBackend(CudaBackend):
pass
\ No newline at end of file def __init__(self, sympy_printer=None,
signature_only=False):
if not sympy_printer:
sympy_printer = OpenClSympyPrinter()
super().__init__(sympy_printer, signature_only)
self._dialect = 'opencl'
# def _print_SympyAssignment(self, node):
# code = super()._print_SympyAssignment(node)
# if node.is_declaration and isinstance(node.lhs.dtype, pystencils.data_types.PointerType):
# return "__global " + code
# else:
# return code
def _print_Type(self, node):
code = super()._print_Type(node)
if isinstance(node, pystencils.data_types.PointerType):
return "__global " + code
else:
return code
class OpenClSympyPrinter(CudaSympyPrinter):
language = "OpenCL"
DIMENSION_MAPPING = {
'x': '0',
'y': '1',
'z': '2'
}
INDEXING_FUNCTION_MAPPING = {
'blockIdx': 'get_group_id',
'threadIdx': 'get_local_id',
'blockDim': 'get_local_size',
'gridDim': 'get_global_size'
}
def _print_ThreadIndexingSymbol(self, node):
symbol_name: str = node.name
function_name, dimension = tuple(symbol_name.split("."))
dimension = self.DIMENSION_MAPPING[dimension]
function_name = self.INDEXING_FUNCTION_MAPPING[function_name]
return f"{function_name}({dimension})"
import abc import abc
from functools import partial from functools import partial
from typing import Tuple # noqa
import sympy as sp import sympy as sp
from sympy.core.cache import cacheit
from pystencils.astnodes import Block, Conditional from pystencils.astnodes import Block, Conditional
from pystencils.data_types import TypedSymbol, create_type from pystencils.data_types import TypedSymbol, create_type
...@@ -10,10 +10,24 @@ from pystencils.integer_functions import div_ceil, div_floor ...@@ -10,10 +10,24 @@ from pystencils.integer_functions import div_ceil, div_floor
from pystencils.slicing import normalize_slice from pystencils.slicing import normalize_slice
from pystencils.sympyextensions import is_integer_sequence, prod from pystencils.sympyextensions import is_integer_sequence, prod
BLOCK_IDX = [TypedSymbol("blockIdx." + coord, create_type("int")) for coord in ('x', 'y', 'z')]
THREAD_IDX = [TypedSymbol("threadIdx." + coord, create_type("int")) for coord in ('x', 'y', 'z')] class ThreadIndexingSymbol(TypedSymbol):
BLOCK_DIM = [TypedSymbol("blockDim." + coord, create_type("int")) for coord in ('x', 'y', 'z')] def __new__(cls, *args, **kwds):
GRID_DIM = [TypedSymbol("gridDim." + coord, create_type("int")) for coord in ('x', 'y', 'z')] obj = ThreadIndexingSymbol.__xnew_cached_(cls, *args, **kwds)
return obj
def __new_stage2__(cls, name, dtype, *args, **kwargs):
obj = super(ThreadIndexingSymbol, cls).__xnew__(cls, name, dtype, *args, **kwargs)
return obj
__xnew__ = staticmethod(__new_stage2__)
__xnew_cached_ = staticmethod(cacheit(__new_stage2__))
BLOCK_IDX = [ThreadIndexingSymbol("blockIdx." + coord, create_type("int")) for coord in ('x', 'y', 'z')]
THREAD_IDX = [ThreadIndexingSymbol("threadIdx." + coord, create_type("int")) for coord in ('x', 'y', 'z')]
BLOCK_DIM = [ThreadIndexingSymbol("blockDim." + coord, create_type("int")) for coord in ('x', 'y', 'z')]
GRID_DIM = [ThreadIndexingSymbol("gridDim." + coord, create_type("int")) for coord in ('x', 'y', 'z')]
class AbstractIndexing(abc.ABC): class AbstractIndexing(abc.ABC):
...@@ -69,6 +83,7 @@ class AbstractIndexing(abc.ABC): ...@@ -69,6 +83,7 @@ class AbstractIndexing(abc.ABC):
def symbolic_parameters(self): def symbolic_parameters(self):
"""Set of symbols required in call_parameters code""" """Set of symbols required in call_parameters code"""
# -------------------------------------------- Implementations --------------------------------------------------------- # -------------------------------------------- Implementations ---------------------------------------------------------
...@@ -82,6 +97,7 @@ class BlockIndexing(AbstractIndexing): ...@@ -82,6 +97,7 @@ class BlockIndexing(AbstractIndexing):
gets the largest amount of threads gets the largest amount of threads
compile_time_block_size: compile in concrete block size, otherwise the cuda variable 'blockDim' is used compile_time_block_size: compile in concrete block size, otherwise the cuda variable 'blockDim' is used
""" """
def __init__(self, field, iteration_slice, def __init__(self, field, iteration_slice,
block_size=(16, 16, 1), permute_block_size_dependent_on_layout=True, compile_time_block_size=False, block_size=(16, 16, 1), permute_block_size_dependent_on_layout=True, compile_time_block_size=False,
maximum_block_size=(1024, 1024, 64)): maximum_block_size=(1024, 1024, 64)):
......
...@@ -2,7 +2,7 @@ import sympy as sp ...@@ -2,7 +2,7 @@ import sympy as sp
import pystencils import pystencils
from pystencils.backends.cuda_backend import CudaBackend from pystencils.backends.cuda_backend import CudaBackend
from pystencils.backends.opencl_backend import OpenCLBackend from pystencils.backends.opencl_backend import OpenClBackend
def test_opencl_backend(): def test_opencl_backend():
...@@ -21,7 +21,7 @@ def test_opencl_backend(): ...@@ -21,7 +21,7 @@ def test_opencl_backend():
code = pystencils.show_code(ast, custom_backend=CudaBackend()) code = pystencils.show_code(ast, custom_backend=CudaBackend())
print(code) print(code)
opencl_code = pystencils.show_code(ast, custom_backend=OpenCLBackend()) opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
print(opencl_code) print(opencl_code)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment