Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Jonas Plewinski
pystencils
Commits
b93d6992
Commit
b93d6992
authored
Aug 13, 2019
by
Stephan Seitz
Browse files
Implement opencljit
parent
c07ccb36
Changes
4
Hide whitespace changes
Inline
Side-by-side
pystencils/backends/opencl_backend.py
View file @
b93d6992
...
...
@@ -18,7 +18,8 @@ def generate_opencl(astnode: Node, signature_only: bool = False) -> str:
class
OpenClBackend
(
CudaBackend
):
def
__init__
(
self
,
sympy_printer
=
None
,
def
__init__
(
self
,
sympy_printer
=
None
,
signature_only
=
False
):
if
not
sympy_printer
:
sympy_printer
=
OpenClSympyPrinter
()
...
...
@@ -26,12 +27,6 @@ class OpenClBackend(CudaBackend):
super
().
__init__
(
sympy_printer
,
signature_only
)
self
.
_dialect
=
'opencl'
# def _print_SympyAssignment(self, node):
# code = super()._print_SympyAssignment(node)
# if node.is_declaration and isinstance(node.lhs.dtype, pystencils.data_types.PointerType):
# return "__global " + code
# else:
# return code
def
_print_Type
(
self
,
node
):
code
=
super
().
_print_Type
(
node
)
...
...
pystencils/include/opencl_stdint.h
0 → 100644
View file @
b93d6992
typedef
long
int
int64_t
;
pystencils/opencl/opencljit.py
0 → 100644
View file @
b93d6992
import
numpy
as
np
from
pystencils.backends.cbackend
import
generate_c
,
get_headers
from
pystencils.gpucuda.cudajit
import
_build_numpy_argument_list
,
_check_arguments
from
pystencils.include
import
get_pystencils_include_path
USE_FAST_MATH
=
True
def
make_python_function
(
kernel_function_node
,
opencl_queue
,
opencl_ctx
,
argument_dict
=
None
,
custom_backend
=
None
):
"""
Creates a kernel function from an abstract syntax tree which
was created e.g. by :func:`pystencils.gpucuda.create_cuda_kernel`
or :func:`pystencils.gpucuda.created_indexed_cuda_kernel`
Args:
kernel_function_node: the abstract syntax tree
argument_dict: parameters passed here are already fixed. Remaining parameters have to be passed to the
returned kernel functor.
Returns:
compiled kernel as Python function
"""
import
pyopencl
as
cl
if
argument_dict
is
None
:
argument_dict
=
{}
kernel_function_node
.
function_name
=
"opencl_"
+
kernel_function_node
.
function_name
header_list
=
[
'"opencl_stdint.h"'
]
+
list
(
get_headers
(
kernel_function_node
))
includes
=
"
\n
"
.
join
([
"#include %s"
%
(
include_file
,)
for
include_file
in
header_list
])
code
=
includes
+
"
\n
"
code
+=
"#define FUNC_PREFIX __kernel
\n
"
code
+=
"#define RESTRICT restrict
\n\n
"
code
+=
str
(
generate_c
(
kernel_function_node
,
dialect
=
'opencl'
,
custom_backend
=
custom_backend
))
options
=
[]
if
USE_FAST_MATH
:
options
.
append
(
"-cl-unsafe-math-optimizations -cl-mad-enable -cl-fast-relaxed-math -cl-finite-math-only"
)
options
.
append
(
"-I
\"
"
+
get_pystencils_include_path
()
+
"
\"
"
)
mod
=
cl
.
Program
(
opencl_ctx
,
code
).
build
(
options
=
options
)
func
=
getattr
(
mod
,
kernel_function_node
.
function_name
)
parameters
=
kernel_function_node
.
get_parameters
()
cache
=
{}
cache_values
=
[]
def
wrapper
(
**
kwargs
):
key
=
hash
(
tuple
((
k
,
v
.
ctypes
.
data
,
v
.
strides
,
v
.
shape
)
if
isinstance
(
v
,
np
.
ndarray
)
else
(
k
,
id
(
v
))
for
k
,
v
in
kwargs
.
items
()))
try
:
args
,
block_and_thread_numbers
=
cache
[
key
]
func
(
opencl_queue
,
block_and_thread_numbers
[
'grid'
],
block_and_thread_numbers
[
'block'
],
*
args
)
except
KeyError
:
full_arguments
=
argument_dict
.
copy
()
full_arguments
.
update
(
kwargs
)
shape
=
_check_arguments
(
parameters
,
full_arguments
)
indexing
=
kernel_function_node
.
indexing
block_and_thread_numbers
=
indexing
.
call_parameters
(
shape
)
block_and_thread_numbers
[
'block'
]
=
tuple
(
int
(
i
)
for
i
in
block_and_thread_numbers
[
'block'
])
block_and_thread_numbers
[
'grid'
]
=
tuple
(
int
(
b
*
g
)
for
(
b
,
g
)
in
zip
(
block_and_thread_numbers
[
'block'
],
block_and_thread_numbers
[
'grid'
]))
args
=
_build_numpy_argument_list
(
parameters
,
full_arguments
)
args
=
[
a
.
data
for
a
in
args
if
hasattr
(
a
,
'data'
)]
cache
[
key
]
=
(
args
,
block_and_thread_numbers
)
cache_values
.
append
(
kwargs
)
# keep objects alive such that ids remain unique
func
(
opencl_queue
,
block_and_thread_numbers
[
'grid'
],
block_and_thread_numbers
[
'block'
],
*
args
)
wrapper
.
ast
=
kernel_function_node
wrapper
.
parameters
=
kernel_function_node
.
get_parameters
()
return
wrapper
pystencils_tests/test_opencl.py
View file @
b93d6992
import
numpy
as
np
import
pyopencl
as
cl
import
sympy
as
sp
import
pystencils
from
pystencils.backends.cuda_backend
import
CudaBackend
from
pystencils.backends.opencl_backend
import
OpenClBackend
from
pystencils.opencl.opencljit
import
make_python_function
def
test_opencl
_backend
():
def
test_
print_
opencl
():
z
,
y
,
x
=
pystencils
.
fields
(
"z, y, x: [2d]"
)
assignments
=
pystencils
.
AssignmentCollection
({
...
...
@@ -24,6 +27,61 @@ def test_opencl_backend():
opencl_code
=
pystencils
.
show_code
(
ast
,
custom_backend
=
OpenClBackend
())
print
(
opencl_code
)
assert
"__global double * RESTRICT const _data_x"
in
str
(
opencl_code
)
assert
"__global double * RESTRICT"
in
str
(
opencl_code
)
assert
"get_local_id(0)"
in
str
(
opencl_code
)
def
test_opencl_jit
():
z
,
y
,
x
=
pystencils
.
fields
(
"z, y, x: [20,30]"
)
assignments
=
pystencils
.
AssignmentCollection
({
z
[
0
,
0
]:
x
[
0
,
0
]
*
sp
.
log
(
x
[
0
,
0
]
*
y
[
0
,
0
])
})
print
(
assignments
)
ast
=
pystencils
.
create_kernel
(
assignments
,
target
=
'gpu'
)
print
(
ast
)
code
=
pystencils
.
show_code
(
ast
,
custom_backend
=
CudaBackend
())
print
(
code
)
opencl_code
=
pystencils
.
show_code
(
ast
,
custom_backend
=
OpenClBackend
())
print
(
opencl_code
)
cuda_kernel
=
ast
.
compile
()
assert
cuda_kernel
is
not
None
import
pycuda.gpuarray
as
gpuarray
x_cpu
=
np
.
random
.
rand
(
20
,
30
)
y_cpu
=
np
.
random
.
rand
(
20
,
30
)
z_cpu
=
np
.
random
.
rand
(
20
,
30
)
x
=
gpuarray
.
to_gpu
(
x_cpu
)
y
=
gpuarray
.
to_gpu
(
y_cpu
)
z
=
gpuarray
.
to_gpu
(
z_cpu
)
cuda_kernel
(
x
=
x
,
y
=
y
,
z
=
z
)
result_cuda
=
z
.
get
()
import
pyopencl.array
as
array
ctx
=
cl
.
create_some_context
(
0
)
queue
=
cl
.
CommandQueue
(
ctx
)
x
=
array
.
to_device
(
queue
,
x_cpu
)
y
=
array
.
to_device
(
queue
,
y_cpu
)
z
=
array
.
to_device
(
queue
,
z_cpu
)
opencl_kernel
=
make_python_function
(
ast
,
queue
,
ctx
)
assert
opencl_kernel
is
not
None
opencl_kernel
(
x
=
x
,
y
=
y
,
z
=
z
)
result_opencl
=
z
.
get
(
queue
)
assert
np
.
allclose
(
result_cuda
,
result_opencl
)
if
__name__
==
'__main__'
:
test_opencl_
backend
()
test_opencl_
jit
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment