Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Sebastian Bindgen
pystencils
Commits
d1f332bd
Commit
d1f332bd
authored
Oct 13, 2020
by
Stephan Seitz
Browse files
Merge branch 'Extend_testsuit' into 'master'
Extend testsuit See merge request
!174
parents
9057081d
6654fa17
Changes
23
Expand all
Hide whitespace changes
Inline
Side-by-side
.gitlab-ci.yml
View file @
d1f332bd
...
...
@@ -93,6 +93,7 @@ minimal-conda:
-
python setup.py quicktest
tags
:
-
docker
-
cuda
minimal-sympy-master
:
...
...
@@ -107,6 +108,7 @@ minimal-sympy-master:
allow_failure
:
true
tags
:
-
docker
-
cuda
pycodegen-integration
:
...
...
pystencils/backends/cbackend.py
View file @
d1f332bd
...
...
@@ -45,9 +45,11 @@ def generate_c(ast_node: Node,
create_kernel functions.
Args:
ast_node:
signature_only:
dialect: 'c' or 'cuda'
ast_node: ast representation of kernel
signature_only: generate signature without function body
dialect: 'c', 'cuda' or opencl
custom_backend: use own custom printer for code generation
with_globals: enable usage of global variables
Returns:
C-like code for the ast node and its descendants
"""
...
...
pystencils/backends/cuda_backend.py
View file @
d1f332bd
...
...
@@ -10,17 +10,20 @@ with open(join(dirname(__file__), 'cuda_known_functions.txt')) as f:
CUDA_KNOWN_FUNCTIONS
=
{
l
.
strip
():
l
.
strip
()
for
l
in
lines
if
l
}
def
generate_cuda
(
astnode
:
Node
,
signature_only
:
bool
=
False
)
->
str
:
def
generate_cuda
(
ast
_
node
:
Node
,
signature_only
:
bool
=
False
,
custom_backend
=
None
,
with_globals
=
True
)
->
str
:
"""Prints an abstract syntax tree node as CUDA code.
Args:
astnode: KernelFunction node to generate code for
signature_only: if True only the signature is printed
ast_node: ast representation of kernel
signature_only: generate signature without function body
custom_backend: use own custom printer for code generation
with_globals: enable usage of global variables
Returns:
C
-like
code for the ast node and its descendants
C
UDA
code for the ast node and its descendants
"""
return
generate_c
(
astnode
,
signature_only
,
dialect
=
'cuda'
)
return
generate_c
(
ast_node
,
signature_only
,
dialect
=
'cuda'
,
custom_backend
=
custom_backend
,
with_globals
=
with_globals
)
class
CudaBackend
(
CBackend
):
...
...
pystencils/backends/json.py
View file @
d1f332bd
...
...
@@ -12,22 +12,10 @@ import json
from
pystencils.astnodes
import
NodeOrExpr
from
pystencils.backends.cbackend
import
CustomSympyPrinter
,
generate_c
try
:
import
toml
except
Exception
:
class
toml
:
def
dumps
(
self
,
*
args
):
raise
ImportError
(
'toml not installed'
)
def
dump
(
self
,
*
args
):
raise
ImportError
(
'toml not installed'
)
try
:
import
yaml
except
Exception
:
class
yaml
:
def
dumps
(
self
,
*
args
):
raise
ImportError
(
'pyyaml not installed'
)
except
ImportError
:
raise
ImportError
(
'yaml not installed'
)
def
expr_to_dict
(
expr_or_node
:
NodeOrExpr
,
with_c_code
=
True
,
full_class_names
=
False
):
...
...
@@ -67,8 +55,8 @@ def print_json(expr_or_node: NodeOrExpr):
Returns:
str: JSON representation of AST
"""
dict
=
expr_to_dict
(
expr_or_node
)
return
json
.
dumps
(
dict
,
indent
=
4
)
expr_or_node_
dict
=
expr_to_dict
(
expr_or_node
)
return
json
.
dumps
(
expr_or_node_
dict
,
indent
=
4
)
def
write_json
(
filename
:
str
,
expr_or_node
:
NodeOrExpr
):
...
...
@@ -78,28 +66,17 @@ def write_json(filename: str, expr_or_node: NodeOrExpr):
filename (str): Path for the file to write
expr_or_node (NodeOrExpr): a SymPy expression or a :class:`pystencils.astnodes.Node`
"""
dict
=
expr_to_dict
(
expr_or_node
)
with
open
(
filename
,
'w'
)
as
f
:
json
.
dump
(
dict
,
f
,
indent
=
4
)
def
print_toml
(
expr_or_node
):
dict
=
expr_to_dict
(
expr_or_node
,
full_class_names
=
False
)
return
toml
.
dumps
(
dict
)
def
write_toml
(
filename
,
expr_or_node
):
dict
=
expr_to_dict
(
expr_or_node
)
expr_or_node_dict
=
expr_to_dict
(
expr_or_node
)
with
open
(
filename
,
'w'
)
as
f
:
toml
.
dump
(
dict
,
f
)
json
.
dump
(
expr_or_node_dict
,
f
,
indent
=
4
)
def
print_yaml
(
expr_or_node
):
dict
=
expr_to_dict
(
expr_or_node
,
full_class_names
=
False
)
return
yaml
.
dump
(
dict
)
expr_or_node_
dict
=
expr_to_dict
(
expr_or_node
,
full_class_names
=
False
)
return
yaml
.
dump
(
expr_or_node_
dict
)
def
write_yaml
(
filename
,
expr_or_node
):
dict
=
expr_to_dict
(
expr_or_node
)
expr_or_node_
dict
=
expr_to_dict
(
expr_or_node
)
with
open
(
filename
,
'w'
)
as
f
:
yaml
.
dump
(
dict
,
f
)
yaml
.
dump
(
expr_or_node_
dict
,
f
)
pystencils/backends/opencl_backend.py
View file @
d1f332bd
...
...
@@ -11,17 +11,20 @@ with open(join(dirname(__file__), 'opencl1.1_known_functions.txt')) as f:
OPENCL_KNOWN_FUNCTIONS
=
{
l
.
strip
():
l
.
strip
()
for
l
in
lines
if
l
}
def
generate_opencl
(
astnode
:
Node
,
signature_only
:
bool
=
False
)
->
str
:
def
generate_opencl
(
ast
_
node
:
Node
,
signature_only
:
bool
=
False
,
custom_backend
=
None
,
with_globals
=
True
)
->
str
:
"""Prints an abstract syntax tree node (made for target 'gpu') as OpenCL code.
Args:
astnode: KernelFunction node to generate code for
signature_only: if True only the signature is printed
ast_node: ast representation of kernel
signature_only: generate signature without function body
custom_backend: use own custom printer for code generation
with_globals: enable usage of global variables
Returns:
C-like
code for the ast node and its descendants
OpenCL
code for the ast node and its descendants
"""
return
generate_c
(
astnode
,
signature_only
,
dialect
=
'opencl'
)
return
generate_c
(
ast_node
,
signature_only
,
dialect
=
'opencl'
,
custom_backend
=
custom_backend
,
with_globals
=
with_globals
)
class
OpenClBackend
(
CudaBackend
):
...
...
pystencils/datahandling/pycuda.py
View file @
d1f332bd
...
...
@@ -13,26 +13,31 @@ class PyCudaArrayHandler:
import
pycuda.autoinit
# NOQA
def
zeros
(
self
,
shape
,
dtype
=
np
.
float64
,
order
=
'C'
):
return
gpuarray
.
zeros
(
shape
,
dtype
,
order
)
cpu_array
=
np
.
zeros
(
shape
=
shape
,
dtype
=
dtype
,
order
=
order
)
return
self
.
to_gpu
(
cpu_array
)
def
ones
(
self
,
shape
,
dtype
,
order
=
'C'
):
return
gpuarray
.
ones
(
shape
,
dtype
,
order
)
def
ones
(
self
,
shape
,
dtype
=
np
.
float64
,
order
=
'C'
):
cpu_array
=
np
.
ones
(
shape
=
shape
,
dtype
=
dtype
,
order
=
order
)
return
self
.
to_gpu
(
cpu_array
)
def
empty
(
self
,
shape
,
dtype
=
np
.
float64
,
layout
=
None
):
if
layout
:
cpu_array
=
pystencils
.
field
.
create_numpy_array_with_layout
(
shape
,
dtype
,
layout
)
cpu_array
=
pystencils
.
field
.
create_numpy_array_with_layout
(
shape
=
shape
,
dtype
=
dtype
,
layout
=
layout
)
return
self
.
to_gpu
(
cpu_array
)
else
:
return
gpuarray
.
empty
(
shape
,
dtype
)
def
to_gpu
(
self
,
array
):
@
staticmethod
def
to_gpu
(
array
):
return
gpuarray
.
to_gpu
(
array
)
def
upload
(
self
,
gpuarray
,
numpy_array
):
gpuarray
.
set
(
numpy_array
)
@
staticmethod
def
upload
(
array
,
numpy_array
):
array
.
set
(
numpy_array
)
def
download
(
self
,
gpuarray
,
numpy_array
):
gpuarray
.
get
(
numpy_array
)
@
staticmethod
def
download
(
array
,
numpy_array
):
array
.
get
(
numpy_array
)
def
randn
(
self
,
shape
,
dtype
=
np
.
float64
):
cpu_array
=
np
.
random
.
randn
(
*
shape
).
astype
(
dtype
)
...
...
pystencils/datahandling/pyopencl.py
View file @
d1f332bd
...
...
@@ -19,15 +19,17 @@ class PyOpenClArrayHandler:
self
.
queue
=
queue
def
zeros
(
self
,
shape
,
dtype
=
np
.
float64
,
order
=
'C'
):
return
gpuarray
.
zeros
(
shape
,
dtype
,
order
)
cpu_array
=
np
.
zeros
(
shape
=
shape
,
dtype
=
dtype
,
order
=
order
)
return
self
.
to_gpu
(
cpu_array
)
def
ones
(
self
,
shape
,
dtype
,
order
=
'C'
):
return
gpuarray
.
ones
(
self
.
queue
,
shape
,
dtype
,
order
)
def
ones
(
self
,
shape
,
dtype
=
np
.
float64
,
order
=
'C'
):
cpu_array
=
np
.
ones
(
shape
=
shape
,
dtype
=
dtype
,
order
=
order
)
return
self
.
to_gpu
(
cpu_array
)
def
empty
(
self
,
shape
,
dtype
=
np
.
float64
,
layout
=
None
):
if
layout
:
cpu_array
=
pystencils
.
field
.
create_numpy_array_with_layout
(
shape
,
dtype
,
layout
)
return
self
.
from_numpy
(
cpu_array
)
cpu_array
=
pystencils
.
field
.
create_numpy_array_with_layout
(
shape
=
shape
,
dtype
=
dtype
,
layout
=
layout
)
return
self
.
to_gpu
(
cpu_array
)
else
:
return
gpuarray
.
empty
(
self
.
queue
,
shape
,
dtype
)
...
...
pystencils/fd/derivation.py
View file @
d1f332bd
...
...
@@ -126,7 +126,6 @@ class FiniteDifferenceStencilDerivation:
def
isotropy_equations
(
self
,
order
):
def
cycle_int_sequence
(
sequence
,
modulus
):
import
numpy
as
np
result
=
[]
arr
=
np
.
array
(
sequence
,
dtype
=
int
)
while
True
:
...
...
@@ -200,7 +199,7 @@ class FiniteDifferenceStencilDerivation:
via rotation and apply them to a field."""
dim
=
len
(
self
.
stencil
[
0
])
assert
(
dim
==
2
or
dim
==
3
),
"This function is only for 2D or 3D stencils available"
rotated_weights
=
np
.
rot90
(
np
.
array
(
self
),
1
,
axes
)
rotated_weights
=
np
.
rot90
(
np
.
array
(
self
.
__array__
()
),
1
,
axes
)
result
=
[]
max_offset
=
max
(
max
(
abs
(
e
)
for
e
in
direction
)
for
direction
in
self
.
stencil
)
...
...
pystencils/fd/finitedifferences.py
View file @
d1f332bd
...
...
@@ -23,9 +23,9 @@ def diffusion(scalar, diffusion_coeff, idx=None):
>>> f = Field.create_generic('f', spatial_dimensions=2)
>>> d = sp.Symbol("d")
>>> dx = sp.Symbol("dx")
>>> diffusion_term = diffusion(scalar=f, diffusion_coeff=
sp.Symbol("d")
)
>>> diffusion_term = diffusion(scalar=f, diffusion_coeff=
d
)
>>> discretization = Discretization2ndOrder()
>>> expected_output = ((f[-1, 0] + f[0, -1] - 4
*
f[0, 0] + f[0, 1] + f[1, 0]) * d) / dx**2
>>> expected_output = ((f[-1, 0] + f[0, -1] - 4
*
f[0, 0] + f[0, 1] + f[1, 0]) * d) / dx**2
>>> sp.simplify(discretization(diffusion_term) - expected_output)
0
"""
...
...
@@ -79,13 +79,6 @@ class Discretization2ndOrder:
self
.
dt
=
dt
self
.
spatial_stencil
=
discretization_stencil_func
@
staticmethod
def
_diff_order
(
e
):
if
not
isinstance
(
e
,
Diff
):
return
0
else
:
return
1
+
Discretization2ndOrder
.
_diff_order
(
e
.
args
[
0
])
def
_discretize_diffusion
(
self
,
e
):
result
=
0
for
c
in
range
(
e
.
dim
):
...
...
@@ -121,29 +114,6 @@ class Discretization2ndOrder:
new_args
=
[
self
.
_discretize_spatial
(
a
)
for
a
in
e
.
args
]
return
e
.
func
(
*
new_args
)
if
new_args
else
e
def
_discretize_diff
(
self
,
e
):
order
=
self
.
_diff_order
(
e
)
if
order
==
1
:
fa
=
e
.
args
[
0
]
index
=
e
.
target
return
(
fa
.
neighbor
(
index
,
1
)
-
fa
.
neighbor
(
index
,
-
1
))
/
(
2
*
self
.
dx
)
elif
order
==
2
:
indices
=
sorted
([
e
.
target
,
e
.
args
[
0
].
target
])
fa
=
e
.
args
[
0
].
args
[
0
]
if
indices
[
0
]
==
indices
[
1
]
and
all
(
i
>=
0
for
i
in
indices
):
result
=
(
-
2
*
fa
+
fa
.
neighbor
(
indices
[
0
],
-
1
)
+
fa
.
neighbor
(
indices
[
0
],
+
1
))
elif
indices
[
0
]
==
indices
[
1
]:
result
=
0
for
d
in
range
(
fa
.
field
.
spatial_dimensions
):
result
+=
(
-
2
*
fa
+
fa
.
neighbor
(
d
,
-
1
)
+
fa
.
neighbor
(
d
,
+
1
))
else
:
assert
all
(
i
>=
0
for
i
in
indices
)
offsets
=
[(
1
,
1
),
[
-
1
,
1
],
[
1
,
-
1
],
[
-
1
,
-
1
]]
result
=
sum
(
o1
*
o2
*
fa
.
neighbor
(
indices
[
0
],
o1
).
neighbor
(
indices
[
1
],
o2
)
for
o1
,
o2
in
offsets
)
/
4
return
result
/
(
self
.
dx
**
2
)
else
:
raise
NotImplementedError
(
"Term contains derivatives of order > 2"
)
def
__call__
(
self
,
expr
):
if
isinstance
(
expr
,
list
):
return
[
self
(
e
)
for
e
in
expr
]
...
...
pystencils/fd/finitevolumes.py
View file @
d1f332bd
...
...
@@ -7,6 +7,25 @@ from collections import defaultdict
from
collections.abc
import
Iterable
def
get_access_and_direction
(
term
):
direction1
=
term
.
args
[
1
]
if
isinstance
(
term
.
args
[
0
],
ps
.
Field
.
Access
):
# first derivative
access
=
term
.
args
[
0
]
direction
=
(
direction1
,)
elif
isinstance
(
term
.
args
[
0
],
ps
.
fd
.
Diff
):
# nested derivative
if
isinstance
(
term
.
args
[
0
].
args
[
0
],
ps
.
fd
.
Diff
):
# third or higher derivative
raise
ValueError
(
"can only handle first and second derivatives"
)
elif
not
isinstance
(
term
.
args
[
0
].
args
[
0
],
ps
.
Field
.
Access
):
raise
ValueError
(
"can only handle derivatives of field accesses"
)
access
,
direction2
=
term
.
args
[
0
].
args
[:
2
]
direction
=
(
direction1
,
direction2
)
else
:
raise
NotImplementedError
(
f
"can only deal with derivatives of field accesses, "
f
"but not
{
type
(
term
.
args
[
0
])
}
; expansion of derivatives probably failed"
)
return
access
,
direction
class
FVM1stOrder
:
"""Finite-volume discretization
...
...
@@ -48,22 +67,7 @@ class FVM1stOrder:
avg
=
(
term
.
get_shifted
(
*
neighbor
)
+
term
)
*
sp
.
Rational
(
1
,
2
)
return
avg
elif
isinstance
(
term
,
ps
.
fd
.
Diff
):
direction1
=
term
.
args
[
1
]
if
isinstance
(
term
.
args
[
0
],
ps
.
Field
.
Access
):
# first derivative
access
=
term
.
args
[
0
]
direction
=
(
direction1
,)
elif
isinstance
(
term
.
args
[
0
],
ps
.
fd
.
Diff
):
# nested derivative
if
isinstance
(
term
.
args
[
0
].
args
[
0
],
ps
.
fd
.
Diff
):
# third or higher derivative
raise
ValueError
(
"can only handle first and second derivatives"
)
elif
not
isinstance
(
term
.
args
[
0
].
args
[
0
],
ps
.
Field
.
Access
):
raise
ValueError
(
"can only handle derivatives of field accesses"
)
access
,
direction2
=
term
.
args
[
0
].
args
[:
2
]
direction
=
(
direction1
,
direction2
)
else
:
raise
NotImplementedError
(
"can only deal with derivatives of field accesses, "
"but not {}; expansion of derivatives probably failed"
.
format
(
type
(
term
.
args
[
0
])))
access
,
direction
=
get_access_and_direction
(
term
)
fds
=
FDS
(
neighbor
,
access
.
field
.
spatial_dimensions
,
direction
)
return
fds
.
apply
(
access
)
...
...
@@ -103,22 +107,7 @@ class FVM1stOrder:
def
discretize
(
term
):
if
isinstance
(
term
,
ps
.
fd
.
Diff
):
direction1
=
term
.
args
[
1
]
if
isinstance
(
term
.
args
[
0
],
ps
.
Field
.
Access
):
# first derivative
access
=
term
.
args
[
0
]
direction
=
(
direction1
,)
elif
isinstance
(
term
.
args
[
0
],
ps
.
fd
.
Diff
):
# nested derivative
if
isinstance
(
term
.
args
[
0
].
args
[
0
],
ps
.
fd
.
Diff
):
# third or higher derivative
raise
ValueError
(
"can only handle first and second derivatives"
)
elif
not
isinstance
(
term
.
args
[
0
].
args
[
0
],
ps
.
Field
.
Access
):
raise
ValueError
(
"can only handle derivatives of field accesses"
)
access
,
direction2
=
term
.
args
[
0
].
args
[:
2
]
direction
=
(
direction1
,
direction2
)
else
:
raise
NotImplementedError
(
"can only deal with derivatives of field accesses, "
"but not {}; expansion of derivatives probably failed"
.
format
(
type
(
term
.
args
[
0
])))
access
,
direction
=
get_access_and_direction
(
term
)
if
self
.
dim
==
2
:
stencil
=
[
""
.
join
(
a
).
replace
(
" "
,
""
)
for
a
in
itertools
.
product
(
"NS "
,
"EW "
)
...
...
pystencils/gpucuda/cudajit.py
View file @
d1f332bd
import
numpy
as
np
from
pystencils.backends.cbackend
import
generate_c
,
get_headers
from
pystencils.backends.cbackend
import
get_headers
from
pystencils.backends.cuda_backend
import
generate_cuda
from
pystencils.data_types
import
StructType
from
pystencils.field
import
FieldType
from
pystencils.gpucuda.texture_utils
import
ndarray_to_tex
...
...
@@ -45,7 +46,7 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
code
=
includes
+
"
\n
"
code
+=
"#define FUNC_PREFIX __global__
\n
"
code
+=
"#define RESTRICT __restrict__
\n\n
"
code
+=
str
(
generate_c
(
kernel_function_node
,
dialect
=
'cuda'
,
custom_backend
=
custom_backend
))
code
+=
str
(
generate_c
uda
(
kernel_function_node
,
custom_backend
=
custom_backend
))
textures
=
set
(
d
.
interpolator
for
d
in
kernel_function_node
.
atoms
(
InterpolatorAccess
)
if
isinstance
(
d
.
interpolator
,
TextureCachedField
))
...
...
pystencils/gpucuda/texture_utils.py
View file @
d1f332bd
...
...
@@ -7,8 +7,6 @@
"""
"""
from
os.path
import
dirname
,
isdir
,
join
from
typing
import
Union
import
numpy
as
np
...
...
@@ -21,15 +19,6 @@ except Exception:
pass
def
pow_two_divider
(
n
):
if
n
==
0
:
return
0
divider
=
1
while
(
n
&
divider
)
==
0
:
divider
<<=
1
return
divider
def
ndarray_to_tex
(
tex_ref
,
# type: Union[cuda.TextureReference, cuda.SurfaceReference]
ndarray
,
address_mode
=
None
,
...
...
@@ -66,65 +55,3 @@ def ndarray_to_tex(tex_ref, # type: Union[cuda.TextureReference, cuda.SurfaceRe
if
not
read_as_integer
:
tex_ref
.
set_flags
(
tex_ref
.
get_flags
()
&
~
cuda
.
TRSF_READ_AS_INTEGER
)
def
prefilter_for_cubic_bspline
(
gpuarray
):
import
pycuda.autoinit
# NOQA
from
pycuda.compiler
import
SourceModule
ndim
=
gpuarray
.
ndim
assert
ndim
==
2
or
ndim
==
3
,
"Only 2d or 3d supported"
assert
isdir
(
join
(
dirname
(
__file__
),
"CubicInterpolationCUDA"
,
"code"
)),
\
"Submodule CubicInterpolationCUDA does not exist"
nvcc_options
=
[
"-w"
,
"-std=c++11"
,
"-Wno-deprecated-gpu-targets"
]
nvcc_options
+=
[
"-I"
+
join
(
dirname
(
__file__
),
"CubicInterpolationCUDA"
,
"code"
)]
nvcc_options
+=
[
"-I"
+
join
(
dirname
(
__file__
),
"CubicInterpolationCUDA"
,
"code"
,
"internal"
)]
file_name
=
join
(
dirname
(
__file__
),
"CubicInterpolationCUDA"
,
"code"
,
"cubicPrefilter%iD.cu"
%
ndim
)
with
open
(
file_name
)
as
file
:
code
=
file
.
read
()
mod
=
SourceModule
(
code
,
options
=
nvcc_options
)
if
ndim
==
2
:
height
,
width
=
gpuarray
.
shape
block
=
min
(
pow_two_divider
(
height
),
64
)
grid
=
height
//
block
func
=
mod
.
get_function
(
'SamplesToCoefficients2DXf'
)
func
(
gpuarray
,
np
.
uint32
(
gpuarray
.
strides
[
-
2
]),
*
(
np
.
uint32
(
r
)
for
r
in
reversed
(
gpuarray
.
shape
)),
block
=
(
block
,
1
,
1
),
grid
=
(
grid
,
1
,
1
))
block
=
min
(
pow_two_divider
(
width
),
64
)
grid
=
width
//
block
func
=
mod
.
get_function
(
'SamplesToCoefficients2DYf'
)
func
(
gpuarray
,
np
.
uint32
(
gpuarray
.
strides
[
-
2
]),
*
(
np
.
uint32
(
r
)
for
r
in
reversed
(
gpuarray
.
shape
)),
block
=
(
block
,
1
,
1
),
grid
=
(
grid
,
1
,
1
))
elif
ndim
==
3
:
depth
,
height
,
width
=
gpuarray
.
shape
dimX
=
min
(
min
(
pow_two_divider
(
width
),
pow_two_divider
(
height
)),
64
)
dimY
=
min
(
min
(
pow_two_divider
(
depth
),
pow_two_divider
(
height
)),
512
/
dimX
)
block
=
(
dimX
,
dimY
,
1
)
dimGridX
=
(
height
//
block
[
0
],
depth
//
block
[
1
],
1
)
dimGridY
=
(
width
//
block
[
0
],
depth
//
block
[
1
],
1
)
dimGridZ
=
(
width
//
block
[
0
],
height
//
block
[
1
],
1
)
func
=
mod
.
get_function
(
"SamplesToCoefficients3DXf"
)
func
(
gpuarray
,
np
.
uint32
(
gpuarray
.
strides
[
-
2
]),
*
(
np
.
uint32
(
r
)
for
r
in
reversed
(
gpuarray
.
shape
)),
block
=
block
,
grid
=
dimGridX
)
func
=
mod
.
get_function
(
"SamplesToCoefficients3DYf"
)
func
(
gpuarray
,
np
.
uint32
(
gpuarray
.
strides
[
-
2
]),
*
(
np
.
uint32
(
r
)
for
r
in
reversed
(
gpuarray
.
shape
)),
block
=
block
,
grid
=
dimGridY
)
func
=
mod
.
get_function
(
"SamplesToCoefficients3DZf"
)
func
(
gpuarray
,
np
.
uint32
(
gpuarray
.
strides
[
-
2
]),
*
(
np
.
uint32
(
r
)
for
r
in
reversed
(
gpuarray
.
shape
)),
block
=
block
,
grid
=
dimGridZ
)
pystencils/jupyter.py
View file @
d1f332bd
...
...
@@ -7,55 +7,7 @@ from IPython.display import HTML
import
pystencils.plot
as
plt
__all__
=
[
'log_progress'
,
'make_imshow_animation'
,
'display_animation'
,
'set_display_mode'
]
def
log_progress
(
sequence
,
every
=
None
,
size
=
None
,
name
=
'Items'
):
"""Copied from https://github.com/alexanderkuk/log-progress"""
from
ipywidgets
import
IntProgress
,
HTML
,
VBox
from
IPython.display
import
display
is_iterator
=
False
if
size
is
None
:
try
:
size
=
len
(
sequence
)
except
TypeError
:
is_iterator
=
True
if
size
is
not
None
:
if
every
is
None
:
if
size
<=
200
:
every
=
1
else
:
every
=
int
(
size
/
200
)
# every 0.5%
else
:
assert
every
is
not
None
,
'sequence is iterator, set every'
if
is_iterator
:
progress
=
IntProgress
(
min
=
0
,
max
=
1
,
value
=
1
)
progress
.
bar_style
=
'info'
else
:
progress
=
IntProgress
(
min
=
0
,
max
=
size
,
value
=
0
)
label
=
HTML
()
box
=
VBox
(
children
=
[
label
,
progress
])
display
(
box
)
index
=
0
try
:
for
index
,
record
in
enumerate
(
sequence
,
1
):
if
index
==
1
or
index
%
every
==
0
:
if
is_iterator
:
label
.
value
=
f
'
{
name
}
:
{
index
}
/ ?'
else
:
progress
.
value
=
index
label
.
value
=
f
'
{
name
}
:
{
index
}
/
{
size
}
'
yield
record
except
:
progress
.
bar_style
=
'danger'
raise
else
:
progress
.
bar_style
=
'success'
progress
.
value
=
index
label
.
value
=
f
"
{
name
}
:
{
str
(
index
or
'?'
)
}
"
__all__
=
[
'make_imshow_animation'
,
'display_animation'
,
'set_display_mode'
]
VIDEO_TAG
=
"""<video controls width="80%">
...
...
pystencils/opencl/opencljit.py
View file @
d1f332bd
import
numpy
as
np
from
pystencils.backends.cbackend
import
generate_c
,
get_headers
from
pystencils.backends.cbackend
import
get_headers
from
pystencils.backends.opencl_backend
import
generate_opencl
from
pystencils.gpucuda.cudajit
import
_build_numpy_argument_list
,
_check_arguments
from
pystencils.include
import
get_pystencils_include_path
from
pystencils.kernel_wrapper
import
KernelWrapper
...
...
@@ -91,7 +92,7 @@ def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argumen
code
=
includes
+
"
\n
"
code
+=
"#define FUNC_PREFIX __kernel
\n
"
code
+=
"#define RESTRICT restrict
\n\n
"
code
+=
str
(
generate_
c
(
kernel_function_node
,
dialect
=
'opencl'
,
custom_backend
=
custom_backend
))
code
+=
str
(
generate_
opencl
(
kernel_function_node
,
custom_backend
=
custom_backend
))
options
=
[]
if
USE_FAST_MATH
:
options
.
append
(
"-cl-unsafe-math-optimizations"
)
...
...
pystencils_tests/test_datahandling.py
View file @
d1f332bd
...
...
@@ -6,6 +6,8 @@ import numpy as np
import
pystencils
as
ps
from
pystencils
import
create_data_handling
,
create_kernel
from
pystencils.datahandling.pycuda
import
PyCudaArrayHandler
from
pystencils.datahandling.pyopencl
import
PyOpenClArrayHandler
try
:
import
pytest
...
...
@@ -355,3 +357,36 @@ def test_load_data():
assert
np
.
all
(
dh
.
cpu_arrays
[
'src'
])
==
0
assert
np
.
all
(
dh
.
cpu_arrays
[
'dst'
])
==
0
assert
np
.
all
(
dh
.
cpu_arrays
[
'dst2'
])
==
0
@
pytest
.
mark
.
parametrize
(
'target'
,
(
'gpu'
,
'opencl'
))
def
test_array_handler
(
target
):
size
=
(
2
,
2
)
if
target
==
'gpu'
:
array_handler
=
PyCudaArrayHandler
()
if
target
==
'opencl'
:
pytest
.
importorskip
(
'pyopencl'
)
import
pyopencl
as
cl
from
pystencils.opencl.opencljit
import
init_globally
init_globally
()
ctx
=
cl
.
create_some_context
(
0
)
queue
=
cl
.
CommandQueue
(
ctx
)
array_handler
=
PyOpenClArrayHandler
(
queue
)
zero_array
=
array_handler
.
zeros
(
size
)
cpu_array
=
np
.
empty
(
size
)
array_handler
.
download
(
zero_array
,
cpu_array
)
assert
np
.
all
(
cpu_array
)
==
0
ones_array
=
array_handler
.
ones
(
size
)
cpu_array
=
np
.
empty
(
size
)