Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Jonas Plewinski
pystencils
Commits
64734915
Commit
64734915
authored
Aug 22, 2019
by
Martin Bauer
Browse files
Merge branch 'opencl-backend' into 'master'
Basic support for OpenCL (experimental) See merge request
pycodegen/pystencils!29
parents
e7a8d3ce
ba5d035e
Changes
10
Hide whitespace changes
Inline
Side-by-side
README.md
View file @
64734915
...
...
@@ -52,6 +52,7 @@ Without `[interactive]` you get a minimal version with very little dependencies.
All options:
-
`gpu`
: use this if an Nvidia GPU is available and CUDA is installed
-
`pyopencl`
: basic OpenCL support (experimental)
-
`alltrafos`
: pulls in additional dependencies for loop simplification e.g. libisl
-
`bench_db`
: functionality to store benchmark result in object databases
-
`interactive`
: installs dependencies to work in Jupyter including image I/O, plotting etc.
...
...
pystencils/astnodes.py
View file @
64734915
...
...
@@ -707,8 +707,7 @@ class DestructuringBindingsForFieldClass(Node):
corresponding_field_names
=
{
s
.
field_name
for
s
in
undefined_field_symbols
if
hasattr
(
s
,
'field_name'
)}
corresponding_field_names
|=
{
s
.
field_names
[
0
]
for
s
in
undefined_field_symbols
if
hasattr
(
s
,
'field_names'
)}
return
{
TypedSymbol
(
f
,
self
.
CLASS_NAME_TEMPLATE
.
format
(
dtype
=
field_map
[
f
].
dtype
,
ndim
=
field_map
[
f
].
ndim
)
+
'&'
)
for
f
in
corresponding_field_names
}
|
\
(
self
.
body
.
undefined_symbols
-
undefined_field_symbols
)
for
f
in
corresponding_field_names
}
|
(
self
.
body
.
undefined_symbols
-
undefined_field_symbols
)
def
subs
(
self
,
subs_dict
)
->
None
:
"""Inplace! substitute, similar to sympy's but modifies the AST inplace."""
...
...
pystencils/backends/cbackend.py
View file @
64734915
...
...
@@ -59,6 +59,9 @@ def generate_c(ast_node: Node, signature_only: bool = False, dialect='c', custom
elif
dialect
==
'cuda'
:
from
pystencils.backends.cuda_backend
import
CudaBackend
printer
=
CudaBackend
(
signature_only
=
signature_only
)
elif
dialect
==
'opencl'
:
from
pystencils.backends.opencl_backend
import
OpenClBackend
printer
=
OpenClBackend
(
signature_only
=
signature_only
)
else
:
raise
ValueError
(
"Unknown dialect: "
+
str
(
dialect
))
code
=
printer
(
ast_node
)
...
...
@@ -166,14 +169,19 @@ class CBackend:
return
result
def
_print
(
self
,
node
):
if
isinstance
(
node
,
str
):
return
node
for
cls
in
type
(
node
).
__mro__
:
method_name
=
"_print_"
+
cls
.
__name__
if
hasattr
(
self
,
method_name
):
return
getattr
(
self
,
method_name
)(
node
)
raise
NotImplementedError
(
self
.
__class__
.
__name__
+
" does not support node of type "
+
node
.
__class__
.
__name__
)
def
_print_Type
(
self
,
node
):
return
str
(
node
)
def
_print_KernelFunction
(
self
,
node
):
function_arguments
=
[
"%s %s"
%
(
st
r
(
s
.
symbol
.
dtype
),
s
.
symbol
.
name
)
for
s
in
node
.
get_parameters
()]
function_arguments
=
[
"%s %s"
%
(
s
elf
.
_prin
t
(
s
.
symbol
.
dtype
),
s
.
symbol
.
name
)
for
s
in
node
.
get_parameters
()]
launch_bounds
=
""
if
self
.
_dialect
==
'cuda'
:
max_threads
=
node
.
indexing
.
max_threads_per_block
()
...
...
@@ -208,7 +216,11 @@ class CBackend:
def
_print_SympyAssignment
(
self
,
node
):
if
node
.
is_declaration
:
data_type
=
"const "
+
str
(
node
.
lhs
.
dtype
)
+
" "
if
node
.
is_const
else
str
(
node
.
lhs
.
dtype
)
+
" "
if
node
.
is_const
:
prefix
=
'const '
else
:
prefix
=
''
data_type
=
prefix
+
self
.
_print
(
node
.
lhs
.
dtype
)
+
" "
return
"%s%s = %s;"
%
(
data_type
,
self
.
sympy_printer
.
doprint
(
node
.
lhs
),
self
.
sympy_printer
.
doprint
(
node
.
rhs
))
else
:
...
...
@@ -277,10 +289,10 @@ class CBackend:
]
destructuring_bindings
.
sort
()
# only for code aesthetics
return
"{
\n
"
+
self
.
_indent
+
\
(
"
\n
"
+
self
.
_indent
).
join
(
destructuring_bindings
)
+
\
"
\n
"
+
self
.
_indent
+
\
(
"
\n
"
+
self
.
_indent
).
join
(
self
.
_print
(
node
.
body
).
splitlines
())
+
\
"
\n
}"
(
"
\n
"
+
self
.
_indent
).
join
(
destructuring_bindings
)
+
\
"
\n
"
+
self
.
_indent
+
\
(
"
\n
"
+
self
.
_indent
).
join
(
self
.
_print
(
node
.
body
).
splitlines
())
+
\
"
\n
}"
# ------------------------------------------ Helper function & classes -------------------------------------------------
...
...
pystencils/backends/opencl1.1_known_functions.txt
0 → 100644
View file @
64734915
acos
acosh
acospi
asin
asinh
asinpi
atan
atan2
atanh
atanpi
atan2pi
cbrt
ceil
copysign
cos
cosh
cospi
erfc
erf
exp
exp2
exp10
expm1
fabs
fdim
floor
fma
fmax
fmax
fmin45
fmin
fmod
fract
frexp
hypot
ilogb
ldexp
lgamma
lgamma_r
log
log2
log10
log1p
logb
mad
maxmag
minmag
modf
nextafter
pow
pown
powr
remquo
intn
remquo
rint
rootn
rootn
round
rsqrt
sin
sincos
sinh
sinpi
sqrt
tan
tanh
tanpi
tgamma
trunc
half_cos
half_divide
half_exp
half_exp2
half_exp10
half_log
half_log2
half_log10
half_powr
half_recip
half_rsqrt
half_sin
half_sqrt
half_tan
native_cos
native_divide
native_exp
native_exp2
native_exp10
native_log
native_log2
native_log10
native_powr
native_recip
native_rsqrt
native_sin
native_sqrt
native_tan
pystencils/backends/opencl_backend.py
0 → 100644
View file @
64734915
from
os.path
import
dirname
,
join
import
pystencils.data_types
from
pystencils.astnodes
import
Node
from
pystencils.backends.cbackend
import
CustomSympyPrinter
,
generate_c
from
pystencils.backends.cuda_backend
import
CudaBackend
,
CudaSympyPrinter
from
pystencils.fast_approximation
import
fast_division
,
fast_inv_sqrt
,
fast_sqrt
with
open
(
join
(
dirname
(
__file__
),
'opencl1.1_known_functions.txt'
))
as
f
:
lines
=
f
.
readlines
()
OPENCL_KNOWN_FUNCTIONS
=
{
l
.
strip
():
l
.
strip
()
for
l
in
lines
if
l
}
def
generate_opencl
(
astnode
:
Node
,
signature_only
:
bool
=
False
)
->
str
:
"""Prints an abstract syntax tree node (made for target 'gpu') as OpenCL code.
Args:
astnode: KernelFunction node to generate code for
signature_only: if True only the signature is printed
Returns:
C-like code for the ast node and its descendants
"""
return
generate_c
(
astnode
,
signature_only
,
dialect
=
'opencl'
)
class
OpenClBackend
(
CudaBackend
):
def
__init__
(
self
,
sympy_printer
=
None
,
signature_only
=
False
):
if
not
sympy_printer
:
sympy_printer
=
OpenClSympyPrinter
()
super
().
__init__
(
sympy_printer
,
signature_only
)
self
.
_dialect
=
'opencl'
def
_print_Type
(
self
,
node
):
code
=
super
().
_print_Type
(
node
)
if
isinstance
(
node
,
pystencils
.
data_types
.
PointerType
):
return
"__global "
+
code
else
:
return
code
def
_print_ThreadBlockSynchronization
(
self
,
node
):
raise
NotImplementedError
()
def
_print_TextureDeclaration
(
self
,
node
):
raise
NotImplementedError
()
class
OpenClSympyPrinter
(
CudaSympyPrinter
):
language
=
"OpenCL"
DIMENSION_MAPPING
=
{
'x'
:
'0'
,
'y'
:
'1'
,
'z'
:
'2'
}
INDEXING_FUNCTION_MAPPING
=
{
'blockIdx'
:
'get_group_id'
,
'threadIdx'
:
'get_local_id'
,
'blockDim'
:
'get_local_size'
,
'gridDim'
:
'get_global_size'
}
def
__init__
(
self
):
CustomSympyPrinter
.
__init__
(
self
)
self
.
known_functions
=
OPENCL_KNOWN_FUNCTIONS
def
_print_ThreadIndexingSymbol
(
self
,
node
):
symbol_name
:
str
=
node
.
name
function_name
,
dimension
=
tuple
(
symbol_name
.
split
(
"."
))
dimension
=
self
.
DIMENSION_MAPPING
[
dimension
]
function_name
=
self
.
INDEXING_FUNCTION_MAPPING
[
function_name
]
return
f
"
{
function_name
}
(
{
dimension
}
)"
def
_print_TextureAccess
(
self
,
node
):
raise
NotImplementedError
()
# For math functions, OpenCL is more similar to the C++ printer CustomSympyPrinter
# since built-in math functions are generic.
# In CUDA, you have to differentiate between `sin` and `sinf`
_print_math_func
=
CustomSympyPrinter
.
_print_math_func
_print_Pow
=
CustomSympyPrinter
.
_print_Pow
def
_print_Function
(
self
,
expr
):
if
isinstance
(
expr
,
fast_division
):
return
"native_divide(%s, %s)"
%
tuple
(
self
.
_print
(
a
)
for
a
in
expr
.
args
)
elif
isinstance
(
expr
,
fast_sqrt
):
return
"native_sqrt(%s)"
%
tuple
(
self
.
_print
(
a
)
for
a
in
expr
.
args
)
elif
isinstance
(
expr
,
fast_inv_sqrt
):
return
"native_rsqrt(%s)"
%
tuple
(
self
.
_print
(
a
)
for
a
in
expr
.
args
)
return
CustomSympyPrinter
.
_print_Function
(
self
,
expr
)
pystencils/gpucuda/indexing.py
View file @
64734915
import
abc
from
functools
import
partial
from
typing
import
Tuple
# noqa
import
sympy
as
sp
from
sympy.core.cache
import
cacheit
from
pystencils.astnodes
import
Block
,
Conditional
from
pystencils.data_types
import
TypedSymbol
,
create_type
...
...
@@ -10,10 +10,24 @@ from pystencils.integer_functions import div_ceil, div_floor
from
pystencils.slicing
import
normalize_slice
from
pystencils.sympyextensions
import
is_integer_sequence
,
prod
BLOCK_IDX
=
[
TypedSymbol
(
"blockIdx."
+
coord
,
create_type
(
"int"
))
for
coord
in
(
'x'
,
'y'
,
'z'
)]
THREAD_IDX
=
[
TypedSymbol
(
"threadIdx."
+
coord
,
create_type
(
"int"
))
for
coord
in
(
'x'
,
'y'
,
'z'
)]
BLOCK_DIM
=
[
TypedSymbol
(
"blockDim."
+
coord
,
create_type
(
"int"
))
for
coord
in
(
'x'
,
'y'
,
'z'
)]
GRID_DIM
=
[
TypedSymbol
(
"gridDim."
+
coord
,
create_type
(
"int"
))
for
coord
in
(
'x'
,
'y'
,
'z'
)]
class
ThreadIndexingSymbol
(
TypedSymbol
):
def
__new__
(
cls
,
*
args
,
**
kwds
):
obj
=
ThreadIndexingSymbol
.
__xnew_cached_
(
cls
,
*
args
,
**
kwds
)
return
obj
def
__new_stage2__
(
cls
,
name
,
dtype
,
*
args
,
**
kwargs
):
obj
=
super
(
ThreadIndexingSymbol
,
cls
).
__xnew__
(
cls
,
name
,
dtype
,
*
args
,
**
kwargs
)
return
obj
__xnew__
=
staticmethod
(
__new_stage2__
)
__xnew_cached_
=
staticmethod
(
cacheit
(
__new_stage2__
))
BLOCK_IDX
=
[
ThreadIndexingSymbol
(
"blockIdx."
+
coord
,
create_type
(
"int"
))
for
coord
in
(
'x'
,
'y'
,
'z'
)]
THREAD_IDX
=
[
ThreadIndexingSymbol
(
"threadIdx."
+
coord
,
create_type
(
"int"
))
for
coord
in
(
'x'
,
'y'
,
'z'
)]
BLOCK_DIM
=
[
ThreadIndexingSymbol
(
"blockDim."
+
coord
,
create_type
(
"int"
))
for
coord
in
(
'x'
,
'y'
,
'z'
)]
GRID_DIM
=
[
ThreadIndexingSymbol
(
"gridDim."
+
coord
,
create_type
(
"int"
))
for
coord
in
(
'x'
,
'y'
,
'z'
)]
class
AbstractIndexing
(
abc
.
ABC
):
...
...
@@ -69,6 +83,7 @@ class AbstractIndexing(abc.ABC):
def
symbolic_parameters
(
self
):
"""Set of symbols required in call_parameters code"""
# -------------------------------------------- Implementations ---------------------------------------------------------
...
...
@@ -82,6 +97,7 @@ class BlockIndexing(AbstractIndexing):
gets the largest amount of threads
compile_time_block_size: compile in concrete block size, otherwise the cuda variable 'blockDim' is used
"""
def
__init__
(
self
,
field
,
iteration_slice
,
block_size
=
(
16
,
16
,
1
),
permute_block_size_dependent_on_layout
=
True
,
compile_time_block_size
=
False
,
maximum_block_size
=
(
1024
,
1024
,
64
)):
...
...
pystencils/include/opencl_stdint.h
0 → 100644
View file @
64734915
#ifndef OPENCL_STDINT
#define OPENCL_STDINT
typedef
unsigned
int
uint
;
typedef
unsigned
int
uint_t
;
typedef
signed
char
int8_t
;
typedef
signed
short
int16_t
;
typedef
signed
int
int32_t
;
typedef
signed
long
int
int64_t
;
typedef
unsigned
char
uint8_t
;
typedef
unsigned
short
uint16_t
;
typedef
unsigned
int
uint32_t
;
typedef
unsigned
long
int
uint64_t
;
#endif
pystencils/opencl/opencljit.py
0 → 100644
View file @
64734915
import
numpy
as
np
from
pystencils.backends.cbackend
import
generate_c
,
get_headers
from
pystencils.gpucuda.cudajit
import
_build_numpy_argument_list
,
_check_arguments
from
pystencils.include
import
get_pystencils_include_path
USE_FAST_MATH
=
True
def
make_python_function
(
kernel_function_node
,
opencl_queue
,
opencl_ctx
,
argument_dict
=
None
,
custom_backend
=
None
):
"""
Creates a **OpenCL** kernel function from an abstract syntax tree which
was created for the ``target='gpu'`` e.g. by :func:`pystencils.gpucuda.create_cuda_kernel`
or :func:`pystencils.gpucuda.created_indexed_cuda_kernel`
Args:
opencl_queue: a valid :class:`pyopencl.CommandQueue`
opencl_ctx: a valid :class:`pyopencl.Context`
kernel_function_node: the abstract syntax tree
argument_dict: parameters passed here are already fixed. Remaining parameters have to be passed to the
returned kernel functor.
Returns:
compiled kernel as Python function
"""
import
pyopencl
as
cl
assert
opencl_ctx
,
"No valid OpenCL context"
assert
opencl_queue
,
"No valid OpenCL queue"
if
argument_dict
is
None
:
argument_dict
=
{}
# Changing of kernel name necessary since compilation with default name "kernel" is not possible (OpenCL keyword!)
kernel_function_node
.
function_name
=
"opencl_"
+
kernel_function_node
.
function_name
header_list
=
[
'"opencl_stdint.h"'
]
+
list
(
get_headers
(
kernel_function_node
))
includes
=
"
\n
"
.
join
([
"#include %s"
%
(
include_file
,)
for
include_file
in
header_list
])
code
=
includes
+
"
\n
"
code
+=
"#define FUNC_PREFIX __kernel
\n
"
code
+=
"#define RESTRICT restrict
\n\n
"
code
+=
str
(
generate_c
(
kernel_function_node
,
dialect
=
'opencl'
,
custom_backend
=
custom_backend
))
options
=
[]
if
USE_FAST_MATH
:
options
.
append
(
"-cl-unsafe-math-optimizations -cl-mad-enable -cl-fast-relaxed-math -cl-finite-math-only"
)
options
.
append
(
"-I
\"
"
+
get_pystencils_include_path
()
+
"
\"
"
)
mod
=
cl
.
Program
(
opencl_ctx
,
code
).
build
(
options
=
options
)
func
=
getattr
(
mod
,
kernel_function_node
.
function_name
)
parameters
=
kernel_function_node
.
get_parameters
()
cache
=
{}
cache_values
=
[]
def
wrapper
(
**
kwargs
):
key
=
hash
(
tuple
((
k
,
v
.
ctypes
.
data
,
v
.
strides
,
v
.
shape
)
if
isinstance
(
v
,
np
.
ndarray
)
else
(
k
,
id
(
v
))
for
k
,
v
in
kwargs
.
items
()))
try
:
args
,
block_and_thread_numbers
=
cache
[
key
]
func
(
opencl_queue
,
block_and_thread_numbers
[
'grid'
],
block_and_thread_numbers
[
'block'
],
*
args
)
except
KeyError
:
full_arguments
=
argument_dict
.
copy
()
full_arguments
.
update
(
kwargs
)
shape
=
_check_arguments
(
parameters
,
full_arguments
)
indexing
=
kernel_function_node
.
indexing
block_and_thread_numbers
=
indexing
.
call_parameters
(
shape
)
block_and_thread_numbers
[
'block'
]
=
tuple
(
int
(
i
)
for
i
in
block_and_thread_numbers
[
'block'
])
block_and_thread_numbers
[
'grid'
]
=
tuple
(
int
(
b
*
g
)
for
(
b
,
g
)
in
zip
(
block_and_thread_numbers
[
'block'
],
block_and_thread_numbers
[
'grid'
]))
args
=
_build_numpy_argument_list
(
parameters
,
full_arguments
)
args
=
[
a
.
data
if
hasattr
(
a
,
'data'
)
else
a
for
a
in
args
]
cache
[
key
]
=
(
args
,
block_and_thread_numbers
)
cache_values
.
append
(
kwargs
)
# keep objects alive such that ids remain unique
func
(
opencl_queue
,
block_and_thread_numbers
[
'grid'
],
block_and_thread_numbers
[
'block'
],
*
args
)
wrapper
.
ast
=
kernel_function_node
wrapper
.
parameters
=
kernel_function_node
.
get_parameters
()
return
wrapper
pystencils_tests/test_opencl.py
0 → 100644
View file @
64734915
import
numpy
as
np
import
pytest
import
sympy
as
sp
import
pystencils
from
pystencils.backends.cuda_backend
import
CudaBackend
from
pystencils.backends.opencl_backend
import
OpenClBackend
from
pystencils.opencl.opencljit
import
make_python_function
try
:
import
pyopencl
as
cl
HAS_OPENCL
=
True
except
Exception
:
HAS_OPENCL
=
False
def
test_print_opencl
():
z
,
y
,
x
=
pystencils
.
fields
(
"z, y, x: [2d]"
)
assignments
=
pystencils
.
AssignmentCollection
({
z
[
0
,
0
]:
x
[
0
,
0
]
*
sp
.
log
(
x
[
0
,
0
]
*
y
[
0
,
0
])
})
print
(
assignments
)
ast
=
pystencils
.
create_kernel
(
assignments
,
target
=
'gpu'
)
print
(
ast
)
code
=
pystencils
.
show_code
(
ast
,
custom_backend
=
CudaBackend
())
print
(
code
)
opencl_code
=
pystencils
.
show_code
(
ast
,
custom_backend
=
OpenClBackend
())
print
(
opencl_code
)
assert
"__global double * RESTRICT const _data_x"
in
str
(
opencl_code
)
assert
"__global double * RESTRICT"
in
str
(
opencl_code
)
assert
"get_local_id(0)"
in
str
(
opencl_code
)
@
pytest
.
mark
.
skipif
(
not
HAS_OPENCL
,
reason
=
"Test requires pyopencl"
)
def
test_opencl_jit_fixed_size
():
z
,
y
,
x
=
pystencils
.
fields
(
"z, y, x: [20,30]"
)
assignments
=
pystencils
.
AssignmentCollection
({
z
[
0
,
0
]:
x
[
0
,
0
]
*
sp
.
log
(
x
[
0
,
0
]
*
y
[
0
,
0
])
})
print
(
assignments
)
ast
=
pystencils
.
create_kernel
(
assignments
,
target
=
'gpu'
)
print
(
ast
)
code
=
pystencils
.
show_code
(
ast
,
custom_backend
=
CudaBackend
())
print
(
code
)
opencl_code
=
pystencils
.
show_code
(
ast
,
custom_backend
=
OpenClBackend
())
print
(
opencl_code
)
cuda_kernel
=
ast
.
compile
()
assert
cuda_kernel
is
not
None
import
pycuda.gpuarray
as
gpuarray
x_cpu
=
np
.
random
.
rand
(
20
,
30
)
y_cpu
=
np
.
random
.
rand
(
20
,
30
)
z_cpu
=
np
.
random
.
rand
(
20
,
30
)
x
=
gpuarray
.
to_gpu
(
x_cpu
)
y
=
gpuarray
.
to_gpu
(
y_cpu
)
z
=
gpuarray
.
to_gpu
(
z_cpu
)
cuda_kernel
(
x
=
x
,
y
=
y
,
z
=
z
)
result_cuda
=
z
.
get
()
import
pyopencl.array
as
array
ctx
=
cl
.
create_some_context
(
0
)
queue
=
cl
.
CommandQueue
(
ctx
)
x
=
array
.
to_device
(
queue
,
x_cpu
)
y
=
array
.
to_device
(
queue
,
y_cpu
)
z
=
array
.
to_device
(
queue
,
z_cpu
)
opencl_kernel
=
make_python_function
(
ast
,
queue
,
ctx
)
assert
opencl_kernel
is
not
None
opencl_kernel
(
x
=
x
,
y
=
y
,
z
=
z
)
result_opencl
=
z
.
get
(
queue
)
assert
np
.
allclose
(
result_cuda
,
result_opencl
)
@
pytest
.
mark
.
skipif
(
not
HAS_OPENCL
,
reason
=
"Test requires pyopencl"
)
def
test_opencl_jit
():
z
,
y
,
x
=
pystencils
.
fields
(
"z, y, x: [2d]"
)
assignments
=
pystencils
.
AssignmentCollection
({
z
[
0
,
0
]:
x
[
0
,
0
]
*
sp
.
log
(
x
[
0
,
0
]
*
y
[
0
,
0
])
})
print
(
assignments
)
ast
=
pystencils
.
create_kernel
(
assignments
,
target
=
'gpu'
)
print
(
ast
)
code
=
pystencils
.
show_code
(
ast
,
custom_backend
=
CudaBackend
())
print
(
code
)
opencl_code
=
pystencils
.
show_code
(
ast
,
custom_backend
=
OpenClBackend
())
print
(
opencl_code
)
cuda_kernel
=
ast
.
compile
()
assert
cuda_kernel
is
not
None
import
pycuda.gpuarray
as
gpuarray
x_cpu
=
np
.
random
.
rand
(
20
,
30
)
y_cpu
=
np
.
random
.
rand
(
20
,
30
)
z_cpu
=
np
.
random
.
rand
(
20
,
30
)
x
=
gpuarray
.
to_gpu
(
x_cpu
)
y
=
gpuarray
.
to_gpu
(
y_cpu
)
z
=
gpuarray
.
to_gpu
(
z_cpu
)
cuda_kernel
(
x
=
x
,
y
=
y
,
z
=
z
)
result_cuda
=
z
.
get
()
import
pyopencl.array
as
array
ctx
=
cl
.
create_some_context
(
0
)
queue
=
cl
.
CommandQueue
(
ctx
)
x
=
array
.
to_device
(
queue
,
x_cpu
)
y
=
array
.
to_device
(
queue
,
y_cpu
)
z
=
array
.
to_device
(
queue
,
z_cpu
)
opencl_kernel
=
make_python_function
(
ast
,
queue
,
ctx
)
assert
opencl_kernel
is
not
None
opencl_kernel
(
x
=
x
,
y
=
y
,
z
=
z
)
result_opencl
=
z
.
get
(
queue
)
assert
np
.
allclose
(
result_cuda
,
result_opencl
)
@
pytest
.
mark
.
skipif
(
not
HAS_OPENCL
,
reason
=
"Test requires pyopencl"
)
def
test_opencl_jit_with_parameter
():
z
,
y
,
x
=
pystencils
.
fields
(
"z, y, x: [2d]"
)
a
=
sp
.
Symbol
(
'a'
)
assignments
=
pystencils
.
AssignmentCollection
({
z
[
0
,
0
]:
x
[
0
,
0
]
*
sp
.
log
(
x
[
0
,
0
]
*
y
[
0
,
0
])
+
a
})
print
(
assignments
)
ast
=
pystencils
.
create_kernel
(
assignments
,
target
=
'gpu'
)
print
(
ast
)
code
=
pystencils
.
show_code
(
ast
,
custom_backend
=
CudaBackend
())
print
(
code
)
opencl_code
=
pystencils
.
show_code
(
ast
,
custom_backend
=
OpenClBackend
())
print
(
opencl_code
)
cuda_kernel
=
ast
.
compile
()
assert
cuda_kernel
is
not
None
import
pycuda.gpuarray
as
gpuarray
x_cpu
=
np
.
random
.
rand
(
20
,
30
)
y_cpu
=
np
.
random
.
rand
(
20
,
30
)
z_cpu
=
np
.
random
.
rand
(
20
,
30
)
x
=
gpuarray
.
to_gpu
(
x_cpu
)
y
=
gpuarray
.
to_gpu
(
y_cpu
)
z
=
gpuarray
.
to_gpu
(
z_cpu
)
cuda_kernel
(
x