Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Alexander Reinauer
pystencils
Commits
33aa3bdd
Commit
33aa3bdd
authored
Nov 25, 2019
by
Martin Bauer
Browse files
Major data handling and kernel target refactoring
- gpu -> accelerator - no distinction between and target and backend any more
parent
85484500
Changes
16
Hide whitespace changes
Inline
Side-by-side
pystencils/astnodes.py
View file @
33aa3bdd
...
...
@@ -164,31 +164,20 @@ class KernelFunction(Node):
def
field_name
(
self
):
return
self
.
fields
[
0
].
name
def
__init__
(
self
,
body
,
target
,
backend
,
compile_function
,
ghost_layers
,
function_name
=
"kernel"
):
def
__init__
(
self
,
body
,
target
,
compile_function
,
ghost_layers
,
function_name
=
"kernel"
):
super
(
KernelFunction
,
self
).
__init__
()
self
.
_body
=
body
body
.
parent
=
self
self
.
function_name
=
function_name
self
.
_body
.
parent
=
self
self
.
ghost_layers
=
ghost_layers
self
.
_target
=
target
self
.
_backend
=
backend
self
.
target
=
target
# these variables are assumed to be global, so no automatic parameter is generated for them
self
.
global_variables
=
set
()
self
.
instruction_set
=
None
# used in `vectorize` function to tell the backend which i.s. (SSE,AVX) to use
# function that compiles the node to a Python callable, is set by the backends
self
.
_compile_function
=
compile_function
@
property
def
target
(
self
):
"""Currently either 'cpu' or 'gpu' """
return
self
.
_target
@
property
def
backend
(
self
):
"""Backend for generating the code e.g. 'llvm', 'c', 'cuda' """
return
self
.
_backend
@
property
def
symbols_defined
(
self
):
return
set
()
...
...
pystencils/boundaries/boundaryhandling.py
View file @
33aa3bdd
...
...
@@ -43,7 +43,7 @@ class FlagInterface:
raise
ValueError
(
"There is already a boundary handling registered at the data handling."
"If you want to add multiple handling objects, choose a different name."
)
self
.
flag_field
=
data_handling
.
add_array
(
self
.
flag_field_name
,
dtype
=
self
.
dtype
,
cpu
=
True
,
gpu
=
False
)
self
.
flag_field
=
data_handling
.
add_array
(
self
.
flag_field_name
,
dtype
=
self
.
dtype
,
cpu
=
True
,
acc
=
False
)
ff_ghost_layers
=
data_handling
.
ghost_layers_of_field
(
self
.
flag_field_name
)
for
b
in
data_handling
.
iterate
(
ghost_layers
=
ff_ghost_layers
):
b
[
self
.
flag_field_name
].
fill
(
self
.
domain_flag
)
...
...
@@ -87,26 +87,23 @@ class BoundaryHandling:
fi
=
flag_interface
self
.
flag_interface
=
fi
if
fi
is
not
None
else
FlagInterface
(
data_handling
,
name
+
"Flags"
)
gpu
=
self
.
_target
in
self
.
_data_handling
.
_GPU_LIKE_TARGETS
class_
=
self
.
IndexFieldBlockData
if
self
.
_target
==
'opencl'
:
def
opencl_to_device
(
gpu_version
,
cpu_version
):
from
pyopencl
import
array
gpu_version
=
gpu_version
.
boundary_object_to_index_list
cpu_version
=
cpu_version
.
boundary_object_to_index_list
for
obj
,
cpu_arr
in
cpu_version
.
items
():
if
obj
not
in
gpu_version
or
gpu_version
[
obj
].
shape
!=
cpu_arr
.
shape
:
from
pystencils.opencl.opencljit
import
get_global_cl_queue
queue
=
self
.
_data_handling
.
_opencl_queue
or
get_global_cl_queue
()
gpu_version
[
obj
]
=
array
.
to_device
(
queue
,
cpu_arr
)
else
:
gpu_version
[
obj
].
set
(
cpu_arr
)
class_
=
type
(
'opencl_class'
,
(
self
.
IndexFieldBlockData
,),
{
'to_gpu'
:
opencl_to_device
})
data_handling
.
add_custom_class
(
self
.
_index_array_name
,
class_
,
cpu
=
True
,
gpu
=
gpu
)
def
to_cpu
(
gpu_version
,
cpu_version
):
gpu_version
=
gpu_version
.
boundary_object_to_index_list
cpu_version
=
cpu_version
.
boundary_object_to_index_list
for
obj
,
cpu_arr
in
cpu_version
.
items
():
gpu_version
[
obj
].
get
(
cpu_arr
)
def
to_acc
(
gpu_version
,
cpu_version
):
gpu_version
=
gpu_version
.
boundary_object_to_index_list
cpu_version
=
cpu_version
.
boundary_object_to_index_list
for
obj
,
cpu_arr
in
cpu_version
.
items
():
if
obj
not
in
gpu_version
or
gpu_version
[
obj
].
shape
!=
cpu_arr
.
shape
:
gpu_version
[
obj
]
=
self
.
data_handling
.
array_handler
.
to_gpu
(
cpu_arr
)
else
:
self
.
data_handling
.
array_handler
.
upload
(
gpu_version
[
obj
],
cpu_arr
)
creation_function
=
lambda
:
self
.
IndexFieldBlockData
()
data_handling
.
add_custom_data
(
self
.
_index_array_name
,
creation_function
,
creation_function
,
to_acc
,
to_cpu
)
@
property
def
data_handling
(
self
):
...
...
@@ -222,7 +219,7 @@ class BoundaryHandling:
if
self
.
_dirty
:
self
.
prepare
()
for
b
in
self
.
_data_handling
.
iterate
(
gpu
=
self
.
_target
in
self
.
_data_handling
.
_GPU_LIKE
_TARGETS
):
for
b
in
self
.
_data_handling
.
iterate
(
acc
=
self
.
_target
in
self
.
_data_handling
.
ACCELERATOR
_TARGETS
):
for
b_obj
,
idx_arr
in
b
[
self
.
_index_array_name
].
boundary_object_to_index_list
.
items
():
kwargs
[
self
.
_field_name
]
=
b
[
self
.
_field_name
]
kwargs
[
'indexField'
]
=
idx_arr
...
...
@@ -237,7 +234,7 @@ class BoundaryHandling:
if
self
.
_dirty
:
self
.
prepare
()
for
b
in
self
.
_data_handling
.
iterate
(
gpu
=
self
.
_target
in
self
.
_data_handling
.
_GPU_LIKE
_TARGETS
):
for
b
in
self
.
_data_handling
.
iterate
(
acc
=
self
.
_target
in
self
.
_data_handling
.
ACCELERATOR
_TARGETS
):
for
b_obj
,
idx_arr
in
b
[
self
.
_index_array_name
].
boundary_object_to_index_list
.
items
():
arguments
=
kwargs
.
copy
()
arguments
[
self
.
_field_name
]
=
b
[
self
.
_field_name
]
...
...
@@ -320,8 +317,8 @@ class BoundaryHandling:
def
_boundary_data_initialization
(
self
,
boundary_obj
,
boundary_data_setter
,
**
kwargs
):
if
boundary_obj
.
additional_data_init_callback
:
boundary_obj
.
additional_data_init_callback
(
boundary_data_setter
,
**
kwargs
)
if
self
.
_target
in
self
.
_data_handling
.
_GPU_LIKE
_TARGETS
:
self
.
_data_handling
.
to_
gpu
(
self
.
_index_array_name
)
if
self
.
_target
in
self
.
_data_handling
.
ACCELERATOR
_TARGETS
:
self
.
_data_handling
.
to_
acc
(
self
.
_index_array_name
)
class
BoundaryInfo
(
object
):
def
__init__
(
self
,
boundary_obj
,
flag
,
kernel
):
...
...
@@ -330,7 +327,7 @@ class BoundaryHandling:
self
.
kernel
=
kernel
class
IndexFieldBlockData
:
def
__init__
(
self
,
*
_1
,
**
_2
):
def
__init__
(
self
):
self
.
boundary_object_to_index_list
=
{}
self
.
boundary_object_to_data_setter
=
{}
...
...
@@ -338,25 +335,6 @@ class BoundaryHandling:
self
.
boundary_object_to_index_list
.
clear
()
self
.
boundary_object_to_data_setter
.
clear
()
@
staticmethod
def
to_cpu
(
gpu_version
,
cpu_version
):
gpu_version
=
gpu_version
.
boundary_object_to_index_list
cpu_version
=
cpu_version
.
boundary_object_to_index_list
for
obj
,
cpu_arr
in
cpu_version
.
items
():
gpu_version
[
obj
].
get
(
cpu_arr
)
@
staticmethod
def
to_gpu
(
gpu_version
,
cpu_version
):
from
pycuda
import
gpuarray
gpu_version
=
gpu_version
.
boundary_object_to_index_list
cpu_version
=
cpu_version
.
boundary_object_to_index_list
for
obj
,
cpu_arr
in
cpu_version
.
items
():
if
obj
not
in
gpu_version
or
gpu_version
[
obj
].
shape
!=
cpu_arr
.
shape
:
gpu_version
[
obj
]
=
gpuarray
.
to_gpu
(
cpu_arr
)
else
:
gpu_version
[
obj
].
set
(
cpu_arr
)
class
BoundaryDataSetter
:
def
__init__
(
self
,
index_array
,
offset
,
stencil
,
ghost_layers
,
pdf_array
):
...
...
pystencils/cpu/kernelcreation.py
View file @
33aa3bdd
...
...
@@ -64,7 +64,7 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke
loop_order
=
get_optimal_loop_ordering
(
fields_without_buffers
)
loop_node
,
ghost_layer_info
=
make_loop_over_domain
(
body
,
iteration_slice
=
iteration_slice
,
ghost_layers
=
ghost_layers
,
loop_order
=
loop_order
)
ast_node
=
KernelFunction
(
loop_node
,
'cpu'
,
'c'
,
compile_function
=
make_python_function
,
ast_node
=
KernelFunction
(
loop_node
,
'cpu'
,
compile_function
=
make_python_function
,
ghost_layers
=
ghost_layer_info
,
function_name
=
function_name
)
implement_interpolations
(
body
)
...
...
@@ -145,7 +145,7 @@ def create_indexed_kernel(assignments: AssignmentOrAstNodeList, index_fields, fu
loop_body
.
append
(
assignment
)
function_body
=
Block
([
loop_node
])
ast_node
=
KernelFunction
(
function_body
,
"cpu"
,
"c"
,
make_python_function
,
ast_node
=
KernelFunction
(
function_body
,
"cpu"
,
make_python_function
,
ghost_layers
=
None
,
function_name
=
function_name
)
fixed_coordinate_mapping
=
{
f
.
name
:
coordinate_typed_symbols
for
f
in
non_index_fields
}
...
...
pystencils/datahandling/__init__.py
View file @
33aa3bdd
...
...
@@ -18,10 +18,9 @@ except ImportError:
def
create_data_handling
(
domain_size
:
Tuple
[
int
,
...],
periodicity
:
Union
[
bool
,
Tuple
[
bool
,
...]]
=
False
,
default_layout
:
str
=
'SoA'
,
default_
target
:
str
=
'cpu'
,
target
:
str
=
'cpu'
,
parallel
:
bool
=
False
,
default_ghost_layers
:
int
=
1
,
opencl_queue
=
None
)
->
DataHandling
:
default_ghost_layers
:
int
=
1
)
->
DataHandling
:
"""Creates a data handling instance.
Args:
...
...
@@ -29,12 +28,11 @@ def create_data_handling(domain_size: Tuple[int, ...],
periodicity: either True, False for full or no periodicity or a tuple of booleans indicating periodicity
for each coordinate
default_layout: default array layout, that is used if not explicitly specified in 'add_array'
default_target: either 'cpu' or 'gpu
'
target: target where code should be run, e.g. 'cpu' or 'cuda' or 'opencl
'
parallel: if True a parallel domain is created using walberla - each MPI process gets a part of the domain
default_ghost_layers: default number of ghost layers if not overwritten in 'add_array'
"""
if
parallel
:
assert
not
opencl_queue
,
"OpenCL is only supported for SerialDataHandling"
if
wlb
is
None
:
raise
ValueError
(
"Cannot create parallel data handling because walberla module is not available"
)
...
...
@@ -55,15 +53,14 @@ def create_data_handling(domain_size: Tuple[int, ...],
# noinspection PyArgumentList
block_storage
=
wlb
.
createUniformBlockGrid
(
cells
=
domain_size
,
periodic
=
periodicity
)
return
ParallelDataHandling
(
blocks
=
block_storage
,
dim
=
dim
,
default_target
=
default_
target
,
return
ParallelDataHandling
(
blocks
=
block_storage
,
dim
=
dim
,
target
=
target
,
default_layout
=
default_layout
,
default_ghost_layers
=
default_ghost_layers
)
else
:
return
SerialDataHandling
(
domain_size
,
periodicity
=
periodicity
,
default_target
=
default_
target
,
target
=
target
,
default_layout
=
default_layout
,
default_ghost_layers
=
default_ghost_layers
,
opencl_queue
=
opencl_queue
)
default_ghost_layers
=
default_ghost_layers
)
__all__
=
[
'create_data_handling'
]
pystencils/datahandling/datahandling_interface.py
View file @
33aa3bdd
...
...
@@ -16,8 +16,7 @@ class DataHandling(ABC):
'gather' function that has collects (parts of the) distributed data on a single process.
"""
_GPU_LIKE_TARGETS
=
[
'gpu'
,
'opencl'
]
_GPU_LIKE_BACKENDS
=
[
'gpucuda'
,
'opencl'
]
ACCELERATOR_TARGETS
=
[
'cuda'
,
'opencl'
,
'llvm_gpu'
]
# ---------------------------- Adding and accessing data -----------------------------------------------------------
...
...
@@ -39,7 +38,7 @@ class DataHandling(ABC):
@
abstractmethod
def
add_array
(
self
,
name
:
str
,
values_per_cell
,
dtype
=
np
.
float64
,
latex_name
:
Optional
[
str
]
=
None
,
ghost_layers
:
Optional
[
int
]
=
None
,
layout
:
Optional
[
str
]
=
None
,
cpu
:
bool
=
True
,
gpu
:
Optional
[
bool
]
=
None
,
alignment
=
False
,
field_type
=
FieldType
.
GENERIC
)
->
Field
:
cpu
:
bool
=
True
,
acc
:
Optional
[
bool
]
=
None
,
alignment
=
False
,
field_type
=
FieldType
.
GENERIC
)
->
Field
:
"""Adds a (possibly distributed) array to the handling that can be accessed using the given name.
For each array a symbolic field is available via the 'fields' dictionary
...
...
@@ -56,8 +55,11 @@ class DataHandling(ABC):
layout: memory layout of array, either structure of arrays 'SoA' or array of structures 'AoS'.
this is only important if values_per_cell > 1
cpu: allocate field on the CPU
gpu: allocate field on the GPU, if None, a GPU field is allocated if default_target is 'gpu'
acc: allocate field on an accelerator, if an accelerator target has been selected
if None, an accelerator field is allocated only if the target selected when creating the data handling
is not 'cpu'
alignment: either False for no alignment, or the number of bytes to align to
field_type: change from generic to staggered or absolutely accessed fields, see field documentation
Returns:
pystencils field, that can be used to formulate symbolic kernels
"""
...
...
@@ -67,7 +69,7 @@ class DataHandling(ABC):
"""Returns true if a field or custom data element with this name was added."""
@
abstractmethod
def
add_array_like
(
self
,
name
,
name_of_template_field
,
latex_name
=
None
,
cpu
=
True
,
gpu
=
None
):
def
add_array_like
(
self
,
name
,
name_of_template_field
,
latex_name
=
None
,
cpu
=
True
,
acc
=
None
):
"""
Adds an array with the same parameters (number of ghost layers, values_per_cell, dtype) as existing array.
...
...
@@ -76,33 +78,33 @@ class DataHandling(ABC):
name_of_template_field: name of array that is used as template
latex_name: see 'add' method
cpu: see 'add' method
gpu
: see 'add' method
acc
: see 'add' method
"""
@
abstractmethod
def
add_custom_data
(
self
,
name
:
str
,
cpu_creation_function
,
gpu
_creation_function
=
None
,
cpu_to_
gpu
_transfer_func
=
None
,
gpu
_to_cpu_transfer_func
=
None
):
acc
_creation_function
=
None
,
cpu_to_
acc
_transfer_func
=
None
,
acc
_to_cpu_transfer_func
=
None
):
"""Adds custom (non-array) data to domain.
Args:
name: name to access data
cpu_creation_function: function returning a new instance of the data that should be stored
gpu
_creation_function: optional, function returning a new instance, stored on
GPU
cpu_to_
gpu
_transfer_func: function that transfers cpu to
gpu
version,
getting two parameters (
gpu
_instance, cpu_instance)
gpu
_to_cpu_transfer_func: function that transfers
gpu
to cpu version, getting two parameters
(
gpu
_instance, cpu_instance)
acc
_creation_function: optional, function returning a new instance, stored on
the accelerator
cpu_to_
acc
_transfer_func: function that transfers cpu to
accelerator
version,
getting two parameters (
acc
_instance, cpu_instance)
acc
_to_cpu_transfer_func: function that transfers
accelerator
to cpu version, getting two parameters
(
acc
_instance, cpu_instance)
"""
def
add_custom_class
(
self
,
name
:
str
,
class_obj
,
cpu
:
bool
=
True
,
gpu
:
bool
=
False
):
"""Adds non-array data by passing a class object with optional 'to_
gpu
' and 'to_cpu' member functions."""
cpu_to_
gpu
_transfer_func
=
class_obj
.
to_
gpu
if
cpu
and
gpu
and
hasattr
(
class_obj
,
'to_
gpu
'
)
else
None
gpu
_to_cpu_transfer_func
=
class_obj
.
to_cpu
if
cpu
and
gpu
and
hasattr
(
class_obj
,
'to_cpu'
)
else
None
def
add_custom_class
(
self
,
name
:
str
,
class_obj
,
cpu
:
bool
=
True
,
acc
:
bool
=
False
):
"""Adds non-array data by passing a class object with optional 'to_
acc
' and 'to_cpu' member functions."""
cpu_to_
acc
_transfer_func
=
class_obj
.
to_
acc
if
cpu
and
acc
and
hasattr
(
class_obj
,
'to_
acc
'
)
else
None
acc
_to_cpu_transfer_func
=
class_obj
.
to_cpu
if
cpu
and
acc
and
hasattr
(
class_obj
,
'to_cpu'
)
else
None
self
.
add_custom_data
(
name
,
cpu_creation_function
=
class_obj
if
cpu
else
None
,
gpu
_creation_function
=
class_obj
if
gpu
else
None
,
cpu_to_
gpu
_transfer_func
=
cpu_to_
gpu
_transfer_func
,
gpu
_to_cpu_transfer_func
=
gpu
_to_cpu_transfer_func
)
acc
_creation_function
=
class_obj
if
acc
else
None
,
cpu_to_
acc
_transfer_func
=
cpu_to_
acc
_transfer_func
,
acc
_to_cpu_transfer_func
=
acc
_to_cpu_transfer_func
)
@
property
@
abstractmethod
...
...
@@ -128,7 +130,7 @@ class DataHandling(ABC):
"""Returns values_per_cell of array."""
@
abstractmethod
def
iterate
(
self
,
slice_obj
=
None
,
gpu
=
False
,
ghost_layers
=
None
,
def
iterate
(
self
,
slice_obj
=
None
,
acc
=
False
,
ghost_layers
=
None
,
inner_ghost_layers
=
True
)
->
Iterable
[
'Block'
]:
"""Iterate over local part of potentially distributed data structure."""
...
...
@@ -157,32 +159,32 @@ class DataHandling(ABC):
"""
@
abstractmethod
def
swap
(
self
,
name1
,
name2
,
gpu
=
False
):
def
swap
(
self
,
name1
,
name2
,
acc
=
False
):
"""Swaps data of two arrays"""
# ------------------------------- CPU/
GPU
transfer -----------------------------------------------------------------
# ------------------------------- CPU/
ACC
transfer -----------------------------------------------------------------
@
abstractmethod
def
to_cpu
(
self
,
name
):
"""Copies
GPU
data of array with specified name to CPU.
Works only if 'cpu=True' and '
gpu
=True' has been used in 'add' method."""
"""Copies
accelerator
data of array with specified name to CPU.
Works only if 'cpu=True' and '
acc
=True' has been used in 'add' method."""
@
abstractmethod
def
to_
gpu
(
self
,
name
):
"""Copies
GPU
data of array with specified name to
GPU
.
Works only if 'cpu=True' and '
gpu
=True' has been used in 'add' method."""
def
to_
acc
(
self
,
name
):
"""Copies
accelerator
data of array with specified name to
accelerator
.
Works only if 'cpu=True' and '
acc
=True' has been used in 'add' method."""
@
abstractmethod
def
all_to_cpu
(
self
):
"""Copies data from
GPU
to CPU for all arrays that have a CPU and a
GPU
representation."""
"""Copies data from
accelerator
to CPU for all arrays that have a CPU and a
n accelerator
representation."""
@
abstractmethod
def
all_to_
gpu
(
self
):
"""Copies data from CPU to
GPU
for all arrays that have a CPU and a
GPU
representation."""
def
all_to_
acc
(
self
):
"""Copies data from CPU to
accelerator
for all arrays that have a CPU and a
accelerator
representation."""
@
abstractmethod
def
is_on_
gpu
(
self
,
name
):
"""Checks if this data was also allocated on the
GPU
- does not check if this data item is in synced."""
def
is_on_
acc
(
self
,
name
):
"""Checks if this data was also allocated on the
accelerator
- does not check if this data item is in synced."""
@
abstractmethod
def
create_vtk_writer
(
self
,
file_name
,
data_names
,
ghost_layers
=
False
)
->
Callable
[[
int
],
None
]:
...
...
@@ -216,7 +218,7 @@ class DataHandling(ABC):
# ------------------------------- Communication --------------------------------------------------------------------
@
abstractmethod
def
synchronization_function
(
self
,
names
,
stencil
=
None
,
target
=
None
,
**
kwargs
)
->
Callable
[[],
None
]:
def
synchronization_function
(
self
,
names
,
stencil
=
None
,
acc
=
None
,
**
kwargs
)
->
Callable
[[],
None
]:
"""Synchronizes ghost layers for distributed arrays.
For serial scenario this has to be called for correct periodicity handling
...
...
@@ -225,8 +227,9 @@ class DataHandling(ABC):
names: what data to synchronize: name of array or sequence of names
stencil: stencil as string defining which neighbors are synchronized e.g. 'D2Q9', 'D3Q19'
if None, a full synchronization (i.e. D2Q9 or D3Q27) is done
target: either 'cpu' or 'gpu
kwargs: implementation specific, optional optimization parameters for communication
acc: synchronize data on accelerator, if None use accelerator when target at construction is an
accelerator target
kwargs: implementation specific
Returns:
function object to run the communication
...
...
pystencils/datahandling/parallel_datahandling.py
View file @
33aa3bdd
...
...
@@ -16,7 +16,7 @@ class ParallelDataHandling(DataHandling):
GPU_DATA_PREFIX
=
"gpu_"
VTK_COUNTER
=
0
def
__init__
(
self
,
blocks
,
default_ghost_layers
=
1
,
default_layout
=
'SoA'
,
dim
=
3
,
default_
target
=
'cpu'
):
def
__init__
(
self
,
blocks
,
default_ghost_layers
=
1
,
default_layout
=
'SoA'
,
dim
=
3
,
target
=
'cpu'
):
"""
Creates data handling based on walberla block storage
...
...
@@ -27,8 +27,7 @@ class ParallelDataHandling(DataHandling):
dim: dimension of scenario,
walberla always uses three dimensions, so if dim=2 the extend of the
z coordinate of blocks has to be 1
default_target: either 'cpu' or 'gpu' . If set to 'gpu' for each array also a GPU version is allocated
if not overwritten in add_array, and synchronization functions are for the GPU by default
target: either 'cpu' or 'cuda', other targets are not supported in parallel setup
"""
super
(
ParallelDataHandling
,
self
).
__init__
()
assert
dim
in
(
2
,
3
)
...
...
@@ -52,7 +51,8 @@ class ParallelDataHandling(DataHandling):
if
self
.
_dim
==
2
:
assert
self
.
blocks
.
getDomainCellBB
().
size
[
2
]
==
1
self
.
default_target
=
default_target
assert
target
in
(
'cpu'
,
'cuda'
),
"ParallelDataHandling only support 'cpu' and 'cuda' target"
self
.
target
=
target
@
property
def
dim
(
self
):
...
...
@@ -77,24 +77,24 @@ class ParallelDataHandling(DataHandling):
return
self
.
_fieldInformation
[
name
][
'values_per_cell'
]
def
add_custom_data
(
self
,
name
,
cpu_creation_function
,
gpu
_creation_function
=
None
,
cpu_to_
gpu
_transfer_func
=
None
,
gpu
_to_cpu_transfer_func
=
None
):
if
cpu_creation_function
and
gpu
_creation_function
:
if
cpu_to_
gpu
_transfer_func
is
None
or
gpu
_to_cpu_transfer_func
is
None
:
acc
_creation_function
=
None
,
cpu_to_
acc
_transfer_func
=
None
,
acc
_to_cpu_transfer_func
=
None
):
if
cpu_creation_function
and
acc
_creation_function
:
if
cpu_to_
acc
_transfer_func
is
None
or
acc
_to_cpu_transfer_func
is
None
:
raise
ValueError
(
"For GPU data, both transfer functions have to be specified"
)
self
.
_custom_data_transfer_functions
[
name
]
=
(
cpu_to_
gpu
_transfer_func
,
gpu
_to_cpu_transfer_func
)
self
.
_custom_data_transfer_functions
[
name
]
=
(
cpu_to_
acc
_transfer_func
,
acc
_to_cpu_transfer_func
)
if
cpu_creation_function
:
self
.
blocks
.
addBlockData
(
name
,
cpu_creation_function
)
if
gpu
_creation_function
:
self
.
blocks
.
addBlockData
(
self
.
GPU_DATA_PREFIX
+
name
,
gpu
_creation_function
)
if
acc
_creation_function
:
self
.
blocks
.
addBlockData
(
self
.
GPU_DATA_PREFIX
+
name
,
acc
_creation_function
)
self
.
_custom_data_names
.
append
(
name
)
def
add_array
(
self
,
name
,
values_per_cell
=
1
,
dtype
=
np
.
float64
,
latex_name
=
None
,
ghost_layers
=
None
,
layout
=
None
,
cpu
=
True
,
gpu
=
None
,
alignment
=
False
,
field_type
=
FieldType
.
GENERIC
):
layout
=
None
,
cpu
=
True
,
acc
=
None
,
alignment
=
False
,
field_type
=
FieldType
.
GENERIC
):
if
ghost_layers
is
None
:
ghost_layers
=
self
.
default_ghost_layers
if
gpu
is
None
:
gpu
=
self
.
default_
target
==
'
gpu
'
if
acc
is
None
:
acc
=
self
.
target
==
'
cuda
'
if
layout
is
None
:
layout
=
self
.
default_layout
if
len
(
self
.
blocks
)
==
0
:
...
...
@@ -122,13 +122,13 @@ class ParallelDataHandling(DataHandling):
if
cpu
:
wlb
.
field
.
addToStorage
(
self
.
blocks
,
name
,
dtype
,
fSize
=
values_per_cell
,
layout
=
layout_map
[
layout
],
ghostLayers
=
ghost_layers
,
alignment
=
alignment
)
if
gpu
:
if
acc
:
if
alignment
!=
0
:
raise
ValueError
(
"Alignment for walberla GPU fields not yet supported"
)
wlb
.
cuda
.
addGpuFieldToStorage
(
self
.
blocks
,
self
.
GPU_DATA_PREFIX
+
name
,
dtype
,
fSize
=
values_per_cell
,
usePitchedMem
=
False
,
ghostLayers
=
ghost_layers
,
layout
=
layout_map
[
layout
])
if
cpu
and
gpu
:
if
cpu
and
acc
:
self
.
_cpu_gpu_pairs
.
append
((
name
,
self
.
GPU_DATA_PREFIX
+
name
))
block_bb
=
self
.
blocks
.
getBlockCellBB
(
self
.
blocks
[
0
])
...
...
@@ -144,7 +144,7 @@ class ParallelDataHandling(DataHandling):
field_type
=
field_type
)
self
.
fields
[
name
].
latex_name
=
latex_name
self
.
_field_name_to_cpu_data_name
[
name
]
=
name
if
gpu
:
if
acc
:
self
.
_field_name_to_gpu_data_name
[
name
]
=
self
.
GPU_DATA_PREFIX
+
name
return
self
.
fields
[
name
]
...
...
@@ -159,18 +159,18 @@ class ParallelDataHandling(DataHandling):
def
custom_data_names
(
self
):
return
tuple
(
self
.
_custom_data_names
)
def
add_array_like
(
self
,
name
,
name_of_template_field
,
latex_name
=
None
,
cpu
=
True
,
gpu
=
None
):
return
self
.
add_array
(
name
,
latex_name
=
latex_name
,
cpu
=
cpu
,
gpu
=
gpu
,
def
add_array_like
(
self
,
name
,
name_of_template_field
,
latex_name
=
None
,
cpu
=
True
,
acc
=
None
):
return
self
.
add_array
(
name
,
latex_name
=
latex_name
,
cpu
=
cpu
,
acc
=
acc
,
**
self
.
_fieldInformation
[
name_of_template_field
])
def
swap
(
self
,
name1
,
name2
,
gpu
=
False
):
if
gpu
:
def
swap
(
self
,
name1
,
name2
,
acc
=
False
):
if
acc
:
name1
=
self
.
GPU_DATA_PREFIX
+
name1
name2
=
self
.
GPU_DATA_PREFIX
+
name2
for
block
in
self
.
blocks
:
block
[
name1
].
swapDataPointers
(
block
[
name2
])
def
iterate
(
self
,
slice_obj
=
None
,
gpu
=
False
,
ghost_layers
=
True
,
inner_ghost_layers
=
True
):
def
iterate
(
self
,
slice_obj
=
None
,
acc
=
False
,
ghost_layers
=
True
,
inner_ghost_layers
=
True
):
if
ghost_layers
is
True
:
ghost_layers
=
self
.
default_ghost_layers
elif
ghost_layers
is
False
:
...
...
@@ -185,7 +185,7 @@ class ParallelDataHandling(DataHandling):
elif
isinstance
(
ghost_layers
,
str
):
ghost_layers
=
self
.
ghost_layers_of_field
(
ghost_layers
)
prefix
=
self
.
GPU_DATA_PREFIX
if
gpu
else
""
prefix
=
self
.
GPU_DATA_PREFIX
if
acc
else
""
if
slice_obj
is
not
None
:
yield
from
sliced_block_iteration
(
self
.
blocks
,
slice_obj
,
inner_ghost_layers
,
ghost_layers
,
self
.
dim
,
prefix
)
...
...
@@ -229,7 +229,8 @@ class ParallelDataHandling(DataHandling):
kernel_function
(
**
arg_dict
)
def
get_kernel_kwargs
(
self
,
kernel_function
,
**
kwargs
):
if
kernel_function
.
ast
.
backend
==
'gpucuda'
:
if
kernel_function
.
ast
.
target
in
self
.
ACCELERATOR_TARGETS
:
assert
kernel_function
.
ast
.
target
==
'cuda'
,
'ParallelDataHandling only supports CUDA and CPU'
name_map
=
self
.
_field_name_to_gpu_data_name
to_array
=
wlb
.
cuda
.
toGpuArray
else
:
...
...
@@ -258,7 +259,7 @@ class ParallelDataHandling(DataHandling):
else
:
wlb
.
cuda
.
copyFieldToCpu
(
self
.
blocks
,
self
.
GPU_DATA_PREFIX
+
name
,
name
)
def
to_
gpu
(
self
,
name
):
def
to_
acc
(
self
,
name
):
if
name
in
self
.
_custom_data_transfer_functions
:
transfer_func
=
self
.
_custom_data_transfer_functions
[
name
][
0
]
for
block
in
self
.
blocks
:
...
...
@@ -266,7 +267,7 @@ class ParallelDataHandling(DataHandling):
else
:
wlb
.
cuda
.
copyFieldToGpu
(
self
.
blocks
,
self
.
GPU_DATA_PREFIX
+
name
,
name
)
def
is_on_
gpu
(
self
,
name
):
def
is_on_
acc
(
self
,
name
):
return
(
name
,
self
.
GPU_DATA_PREFIX
+
name
)
in
self
.
_cpu_gpu_pairs
def
all_to_cpu
(
self
):
...
...
@@ -275,21 +276,15 @@ class ParallelDataHandling(DataHandling):
for
name
in
self
.
_custom_data_transfer_functions
.
keys
():
self
.
to_cpu
(
name
)
def
all_to_
gpu
(
self
):
def
all_to_
acc
(
self
):
for
cpu_name
,
gpu_name
in
self
.
_cpu_gpu_pairs
:
wlb
.
cuda
.
copyFieldToGpu
(
self
.
blocks
,
gpu_name
,
cpu_name
)
for
name
in
self
.
_custom_data_transfer_functions
.
keys
():
self
.
to_
gpu
(
name
)
self
.
to_
acc
(
name
)
def
synchronization_function_cpu
(
self
,
names
,
stencil
=
None
,
buffered
=
True
,
stencil_restricted
=
False
,
**
_
):
return
self
.
synchronization_function
(
names
,
stencil
,
'cpu'
,
buffered
,
stencil_restricted
)
def
synchronization_function_gpu
(
self
,
names
,
stencil
=
None
,
buffered
=
True
,
stencil_restricted
=
False
,
**
_
):
return
self
.
synchronization_function
(
names
,
stencil
,
'gpu'
,
buffered
,
stencil_restricted
)
def
synchronization_function
(
self
,
names
,
stencil
=
None
,
target
=
None
,
buffered
=
True
,
stencil_restricted
=
False
):
if
target
is
None
:
target
=
self
.
default_target
def
synchronization_function
(
self
,
names
,
stencil
=
None
,
acc
=
None
,
buffered
=
True
,
stencil_restricted
=
False
):
if
acc
is
None
:
target
=
self
.
target
if
stencil
is
None
:
stencil
=
'D3Q27'
if
self
.
dim
==
3
else
'D2Q9'
...
...
@@ -298,12 +293,12 @@ class ParallelDataHandling(DataHandling):
names
=
[
names
]
create_scheme
=
wlb
.
createUniformBufferedScheme
if
buffered
else
wlb
.
createUniformDirectScheme
if
target
==
'cpu'
:
if
not
acc
:
create_packing
=
wlb
.
field
.
createPackInfo
if
buffered
else
wlb
.
field
.
createMPIDatatypeInfo
if
not
buffered
and
stencil_restricted
:
create_packing
=
wlb
.
field
.
createStencilRestrictedPackInfo
else
:
assert
target
==
'
gpu
'
assert
self
.
target
==
'
cuda
'
create_packing
=
wlb
.
cuda
.
createPackInfo
if
buffered
else
wlb
.
cuda
.
createMPIDatatypeInfo
names
=
[
self
.
GPU_DATA_PREFIX
+
name
for
name
in
names
]
...
...
pystencils/datahandling/pycuda.py
View file @
33aa3bdd
...
...
@@ -25,7 +25,7 @@ class PyCudaArrayHandler:
else
:
return
gpuarray
.
empty
(
shape
,
dtype
)
def
to_
gpu
(
self
,
array
):
def
to_
acc
(
self
,
array
):
return
gpuarray
.
to_gpu
(
array
)
def
upload
(
self
,
gpuarray
,
numpy_array
):
...
...
pystencils/datahandling/pyopencl.py
View file @
33aa3bdd
...
...
@@ -10,7 +10,7 @@ import pystencils
class
PyOpenClArrayHandler
:
def
__init__
(
self
,
queue
):
def
__init__
(
self
,
queue
=
None
):
if
not
queue
:
from
pystencils.opencl.opencljit
import
get_global_cl_queue
queue
=
get_global_cl_queue
()
...
...
@@ -31,7 +31,7 @@ class PyOpenClArrayHandler:
else
:
return
gpuarray
.
empty
(
self
.
queue
,
shape
,
dtype
)
def
to_
gpu
(
self
,
array
):