Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Tom Harke
pystencils
Commits
a2983309
Commit
a2983309
authored
Jun 13, 2018
by
Martin Bauer
Browse files
Further small-block optimizations
parent
331b7758
Changes
5
Expand all
Hide whitespace changes
Inline
Side-by-side
boundaries/boundaryhandling.py
View file @
a2983309
...
...
@@ -193,7 +193,7 @@ class BoundaryHandling:
else
:
ff_ghost_layers
=
self
.
_data_handling
.
ghost_layers_of_field
(
self
.
flag_interface
.
flag_field_name
)
for
b
in
self
.
_data_handling
.
iterate
(
ghost_layers
=
ff_ghost_layers
):
for
b_obj
,
setter
in
b
[
self
.
_index_array_name
].
boundary_object
ToD
ata
S
etter
.
items
():
for
b_obj
,
setter
in
b
[
self
.
_index_array_name
].
boundary_object
_to_d
ata
_s
etter
.
items
():
self
.
_boundary_data_initialization
(
b_obj
,
setter
,
**
kwargs
)
def
__call__
(
self
,
**
kwargs
):
...
...
@@ -202,14 +202,9 @@ class BoundaryHandling:
for
b
in
self
.
_data_handling
.
iterate
(
gpu
=
self
.
_target
==
'gpu'
):
for
b_obj
,
idx_arr
in
b
[
self
.
_index_array_name
].
boundary_object_to_index_list
.
items
():
kwargs
[
self
.
_field_name
]
=
b
[
self
.
_field_name
]
kwargs
[
'indexField'
]
=
idx_arr
data_used_in_kernel
=
(
p
.
field_name
for
p
in
self
.
_boundary_object_to_boundary_info
[
b_obj
].
kernel
.
parameters
if
p
.
is_field_ptr_argument
and
p
.
field_name
not
in
kwargs
)
kwargs
.
update
({
name
:
b
[
name
]
for
name
in
data_used_in_kernel
})
self
.
_boundary_object_to_boundary_info
[
b_obj
].
kernel
(
**
kwargs
)
kernel
=
self
.
_boundary_object_to_boundary_info
[
b_obj
].
kernel
self
.
_data_handling
.
run_kernel
(
kernel
,
**
kwargs
)
def
geometry_to_vtk
(
self
,
file_name
=
'geometry'
,
boundaries
=
'all'
,
ghost_layers
=
False
):
"""
...
...
@@ -273,7 +268,7 @@ class BoundaryHandling:
boundary_data_setter
=
BoundaryDataSetter
(
idx_arr
,
b
.
offset
,
self
.
stencil
,
ff_ghost_layers
,
pdf_arr
)
index_array_bd
.
boundary_object_to_index_list
[
b_info
.
boundary_object
]
=
idx_arr
index_array_bd
.
boundary_object
ToD
ata
S
etter
[
b_info
.
boundary_object
]
=
boundary_data_setter
index_array_bd
.
boundary_object
_to_d
ata
_s
etter
[
b_info
.
boundary_object
]
=
boundary_data_setter
self
.
_boundary_data_initialization
(
b_info
.
boundary_object
,
boundary_data_setter
)
def
_boundary_data_initialization
(
self
,
boundary_obj
,
boundary_data_setter
,
**
kwargs
):
...
...
@@ -291,11 +286,11 @@ class BoundaryHandling:
class
IndexFieldBlockData
:
def
__init__
(
self
,
*
_1
,
**
_2
):
self
.
boundary_object_to_index_list
=
{}
self
.
boundary_object
ToD
ata
S
etter
=
{}
self
.
boundary_object
_to_d
ata
_s
etter
=
{}
def
clear
(
self
):
self
.
boundary_object_to_index_list
.
clear
()
self
.
boundary_object
ToD
ata
S
etter
.
clear
()
self
.
boundary_object
_to_d
ata
_s
etter
.
clear
()
@
staticmethod
def
to_cpu
(
gpu_version
,
cpu_version
):
...
...
cpu/cpujit.py
View file @
a2983309
This diff is collapsed.
Click to expand it.
cpu/cpujit_module.py
deleted
100644 → 0
View file @
331b7758
import
os
import
textwrap
import
hashlib
import
numpy
as
np
from
sysconfig
import
get_paths
from
pystencils
import
FieldType
from
pystencils.cpu.cpujit
import
run_compile_step
from
pystencils.data_types
import
get_base_type
from
pystencils.backends.cbackend
import
generate_c
,
get_headers
from
pystencils.utils
import
file_handle_for_atomic_write
,
atomic_file_write
type_mapping
=
{
np
.
float32
:
(
'PyFloat_AsDouble'
,
'float'
),
np
.
float64
:
(
'PyFloat_AsDouble'
,
'double'
),
np
.
int16
:
(
'PyLong_AsLong'
,
'int16_t'
),
np
.
int32
:
(
'PyLong_AsLong'
,
'int32_t'
),
np
.
int64
:
(
'PyLong_AsLong'
,
'int64_t'
),
np
.
uint16
:
(
'PyLong_AsUnsignedLong'
,
'uint16_t'
),
np
.
uint32
:
(
'PyLong_AsUnsignedLong'
,
'uint32_t'
),
np
.
uint64
:
(
'PyLong_AsUnsignedLong'
,
'uint64_t'
),
}
template_extract_scalar
=
"""
PyObject * obj_{name} = PyDict_GetItemString(kwargs, "{name}");
if( obj_{name} == NULL) {{ PyErr_SetString(PyExc_TypeError, "Keyword argument '{name}' missing"); return NULL; }};
{target_type} {name} = ({target_type}) {extract_function}( obj_{name} );
if( PyErr_Occurred() ) {{ return NULL; }}
"""
template_extract_array
=
"""
PyObject * obj_{name} = PyDict_GetItemString(kwargs, "{name}");
if( obj_{name} == NULL) {{ PyErr_SetString(PyExc_TypeError, "Keyword argument '{name}' missing"); return NULL; }};
Py_buffer buffer_{name};
int buffer_{name}_res = PyObject_GetBuffer(obj_{name}, &buffer_{name}, PyBUF_STRIDES | PyBUF_WRITABLE);
if (buffer_{name}_res == -1) {{ return NULL; }}
"""
template_release_buffer
=
"""
PyBuffer_Release(&buffer_{name});
"""
template_function_boilerplate
=
"""
static PyObject * {func_name}(PyObject * self, PyObject * args, PyObject * kwargs)
{{
if( !kwargs || !PyDict_Check(kwargs) ) {{ PyErr_SetString(PyExc_TypeError, "No keyword arguments passed"); return NULL; }}
{pre_call_code}
kernel_{func_name}({parameters});
{post_call_code}
Py_RETURN_NONE;
}}
"""
template_check_array
=
"""
if(!({cond})) {{
PyErr_SetString(PyExc_ValueError, "Wrong {what} of array {name}. Expected {expected}");
return NULL;
}}
"""
template_size_check
=
"""
if(!({cond})) {{
PyErr_SetString(PyExc_TypeError, "Arrays must have same shape"); return NULL;
}}"""
template_module_boilerplate
=
"""
static PyMethodDef method_definitions[] = {{
{method_definitions}
{{NULL, NULL, 0, NULL}}
}};
static struct PyModuleDef module_definition = {{
PyModuleDef_HEAD_INIT,
"{module_name}", /* name of module */
NULL, /* module documentation, may be NULL */
-1, /* size of per-interpreter state of the module,
or -1 if the module keeps state in global variables. */
method_definitions
}};
PyMODINIT_FUNC
PyInit_{module_name}(void)
{{
return PyModule_Create(&module_definition);
}}
"""
def
equal_size_check
(
fields
):
fields
=
list
(
fields
)
if
len
(
fields
)
<=
1
:
return
""
ref_field
=
fields
[
0
]
cond
=
[
"({field.name}_shape[{i}] == {ref_field.name}_shape[{i}])"
.
format
(
ref_field
=
ref_field
,
field
=
field_to_test
,
i
=
i
)
for
field_to_test
in
fields
[
1
:]
for
i
in
range
(
fields
[
0
].
spatial_dimensions
)]
cond
=
" && "
.
join
(
cond
)
return
template_size_check
.
format
(
cond
=
cond
)
def
create_function_boilerplate_code
(
parameter_info
,
name
,
insert_checks
=
True
):
pre_call_code
=
""
parameters
=
[]
post_call_code
=
""
variable_sized_normal_fields
=
set
()
variable_sized_index_fields
=
set
()
for
arg
in
parameter_info
:
if
arg
.
is_field_argument
:
if
arg
.
is_field_ptr_argument
:
pre_call_code
+=
template_extract_array
.
format
(
name
=
arg
.
field_name
)
post_call_code
+=
template_release_buffer
.
format
(
name
=
arg
.
field_name
)
parameters
.
append
(
"({dtype} *)buffer_{name}.buf"
.
format
(
dtype
=
str
(
arg
.
field
.
dtype
),
name
=
arg
.
field_name
))
shapes
=
", "
.
join
([
"buffer_{name}.shape[{i}]"
.
format
(
name
=
arg
.
field_name
,
i
=
i
)
for
i
in
range
(
len
(
arg
.
field
.
strides
))])
pre_call_code
+=
"Py_ssize_t {name}_shape[] = {{ {elements} }};
\n
"
.
format
(
name
=
arg
.
field_name
,
elements
=
shapes
)
item_size
=
get_base_type
(
arg
.
dtype
).
numpy_dtype
.
itemsize
strides
=
[
"buffer_{name}.strides[{i}] / {bytes}"
.
format
(
i
=
i
,
name
=
arg
.
field_name
,
bytes
=
item_size
)
for
i
in
range
(
len
(
arg
.
field
.
strides
))]
strides
=
", "
.
join
(
strides
)
pre_call_code
+=
"Py_ssize_t {name}_strides[] = {{ {elements} }};
\n
"
.
format
(
name
=
arg
.
field_name
,
elements
=
strides
)
if
insert_checks
and
arg
.
field
.
has_fixed_shape
:
shape_cond
=
[
"{name}_shape[{i}] == {s}"
.
format
(
s
=
s
,
name
=
arg
.
field_name
,
i
=
i
)
for
i
,
s
in
enumerate
(
arg
.
field
.
spatial_shape
)]
shape_cond
=
" && "
.
join
(
shape_cond
)
pre_call_code
+=
template_check_array
.
format
(
cond
=
shape_cond
,
what
=
"shape"
,
name
=
arg
.
field
.
name
,
expected
=
str
(
arg
.
field
.
shape
))
strides_cond
=
[
"({name}_strides[{i}] == {s} || {name}_shape[{i}]<=1)"
.
format
(
s
=
s
,
i
=
i
,
name
=
arg
.
field_name
)
for
i
,
s
in
enumerate
(
arg
.
field
.
spatial_strides
)]
strides_cond
=
" && "
.
join
(
strides_cond
)
expected_strides_str
=
str
([
e
*
item_size
for
e
in
arg
.
field
.
strides
])
pre_call_code
+=
template_check_array
.
format
(
cond
=
strides_cond
,
what
=
"strides"
,
name
=
arg
.
field
.
name
,
expected
=
expected_strides_str
)
if
insert_checks
and
not
arg
.
field
.
has_fixed_shape
:
if
FieldType
.
is_generic
(
arg
.
field
):
variable_sized_normal_fields
.
add
(
arg
.
field
)
elif
FieldType
.
is_indexed
(
arg
.
field
):
variable_sized_index_fields
.
add
(
arg
.
field
)
elif
arg
.
is_field_shape_argument
:
parameters
.
append
(
"{name}_shape"
.
format
(
name
=
arg
.
field_name
))
elif
arg
.
is_field_stride_argument
:
parameters
.
append
(
"{name}_strides"
.
format
(
name
=
arg
.
field_name
))
else
:
extract_function
,
target_type
=
type_mapping
[
arg
.
dtype
.
numpy_dtype
.
type
]
pre_call_code
+=
template_extract_scalar
.
format
(
extract_function
=
extract_function
,
target_type
=
target_type
,
name
=
arg
.
name
)
parameters
.
append
(
arg
.
name
)
pre_call_code
+=
equal_size_check
(
variable_sized_normal_fields
)
pre_call_code
+=
equal_size_check
(
variable_sized_index_fields
)
pre_call_code
=
textwrap
.
indent
(
pre_call_code
,
' '
)
post_call_code
=
textwrap
.
indent
(
post_call_code
,
' '
)
return
template_function_boilerplate
.
format
(
func_name
=
name
,
pre_call_code
=
pre_call_code
,
post_call_code
=
post_call_code
,
parameters
=
", "
.
join
(
parameters
))
def
create_module_boilerplate_code
(
module_name
,
names
):
method_definition
=
'{{"{name}", (PyCFunction){name}, METH_VARARGS | METH_KEYWORDS, ""}},'
method_definitions
=
"
\n
"
.
join
([
method_definition
.
format
(
name
=
name
)
for
name
in
names
])
return
template_module_boilerplate
.
format
(
module_name
=
module_name
,
method_definitions
=
method_definitions
)
def
load_kernel_from_file
(
module_name
,
function_name
,
path
):
from
importlib.util
import
spec_from_file_location
,
module_from_spec
spec
=
spec_from_file_location
(
name
=
module_name
,
location
=
path
)
mod
=
module_from_spec
(
spec
)
spec
.
loader
.
exec_module
(
mod
)
return
getattr
(
mod
,
function_name
)
class
ExtensionModuleCode
:
def
__init__
(
self
,
module_name
=
'generated'
):
self
.
module_name
=
module_name
self
.
_ast_nodes
=
[]
self
.
_function_names
=
[]
def
add_function
(
self
,
ast
,
name
=
None
):
self
.
_ast_nodes
.
append
(
ast
)
self
.
_function_names
.
append
(
name
if
name
is
not
None
else
ast
.
function_name
)
def
write_to_file
(
self
,
restrict_qualifier
,
function_prefix
,
file
):
headers
=
{
'<math.h>'
,
'<stdint.h>'
,
'"Python.h"'
}
for
ast
in
self
.
_ast_nodes
:
headers
.
update
(
get_headers
(
ast
))
includes
=
"
\n
"
.
join
([
"#include %s"
%
(
include_file
,)
for
include_file
in
headers
])
print
(
includes
,
file
=
file
)
print
(
"
\n
"
,
file
=
file
)
print
(
"#define RESTRICT %s"
%
(
restrict_qualifier
,),
file
=
file
)
print
(
"#define FUNC_PREFIX %s"
%
(
function_prefix
,),
file
=
file
)
print
(
"
\n
"
,
file
=
file
)
for
ast
,
name
in
zip
(
self
.
_ast_nodes
,
self
.
_function_names
):
old_name
=
ast
.
function_name
ast
.
function_name
=
"kernel_"
+
name
print
(
generate_c
(
ast
),
file
=
file
)
print
(
create_function_boilerplate_code
(
ast
.
parameters
,
name
),
file
=
file
)
ast
.
function_name
=
old_name
print
(
create_module_boilerplate_code
(
self
.
module_name
,
self
.
_function_names
),
file
=
file
)
class
KernelWrapper
:
def
__init__
(
self
,
kernel
,
parameters
,
ast_node
):
self
.
kernel
=
kernel
self
.
parameters
=
parameters
self
.
ast
=
ast_node
def
__call__
(
self
,
**
kwargs
):
return
self
.
kernel
(
**
kwargs
)
def
compile_and_load
(
ast
):
from
pystencils.cpu.cpujit
import
get_cache_config
,
get_compiler_config
cache_config
=
get_cache_config
()
code_hash_str
=
"mod_"
+
hashlib
.
sha256
(
generate_c
(
ast
).
encode
()).
hexdigest
()
code
=
ExtensionModuleCode
(
module_name
=
code_hash_str
)
code
.
add_function
(
ast
,
ast
.
function_name
)
src_file
=
os
.
path
.
join
(
cache_config
[
'object_cache'
],
code_hash_str
+
".cpp"
)
lib_file
=
os
.
path
.
join
(
cache_config
[
'object_cache'
],
code_hash_str
+
".so"
)
if
not
os
.
path
.
exists
(
lib_file
):
compiler_config
=
get_compiler_config
()
extra_flags
=
[
'-I'
+
get_paths
()[
'include'
]]
object_file
=
os
.
path
.
join
(
cache_config
[
'object_cache'
],
code_hash_str
+
'.o'
)
if
not
os
.
path
.
exists
(
object_file
):
with
file_handle_for_atomic_write
(
src_file
)
as
f
:
code
.
write_to_file
(
compiler_config
[
'restrict_qualifier'
],
''
,
f
)
with
atomic_file_write
(
object_file
)
as
file_name
:
compile_cmd
=
[
compiler_config
[
'command'
],
'-c'
]
+
compiler_config
[
'flags'
].
split
()
compile_cmd
+=
[
*
extra_flags
,
'-o'
,
file_name
,
src_file
]
run_compile_step
(
compile_cmd
)
# Linking
with
atomic_file_write
(
lib_file
)
as
file_name
:
run_compile_step
([
compiler_config
[
'command'
],
'-shared'
,
object_file
,
'-o'
,
file_name
]
+
compiler_config
[
'flags'
].
split
())
result
=
load_kernel_from_file
(
code_hash_str
,
ast
.
function_name
,
lib_file
)
return
KernelWrapper
(
result
,
ast
.
parameters
,
ast
)
def
make_python_function
(
kernel_function_node
,
argument_dict
=
None
):
import
functools
result
=
compile_and_load
(
kernel_function_node
)
if
argument_dict
:
result
=
functools
.
partial
(
result
,
**
argument_dict
)
return
result
datahandling/parallel_datahandling.py
View file @
a2983309
...
...
@@ -135,6 +135,8 @@ class ParallelDataHandling(DataHandling):
self
.
_field_name_to_cpu_data_name
[
name
]
=
name
if
gpu
:
self
.
_field_name_to_gpu_data_name
[
name
]
=
self
.
GPU_DATA_PREFIX
+
name
self
.
_rebuild_data_cache
()
return
self
.
fields
[
name
]
def
has_data
(
self
,
name
):
...
...
@@ -154,8 +156,15 @@ class ParallelDataHandling(DataHandling):
def
swap
(
self
,
name1
,
name2
,
gpu
=
False
):
if
gpu
:
for
d
in
self
.
_data_cache_gpu
:
d
[
name1
],
d
[
name2
]
=
d
[
name2
],
d
[
name1
]
name1
=
self
.
GPU_DATA_PREFIX
+
name1
name2
=
self
.
GPU_DATA_PREFIX
+
name2
else
:
for
d
in
self
.
_data_cache_cpu
:
d
[
name1
],
d
[
name2
]
=
d
[
name2
],
d
[
name1
]
for
block
in
self
.
blocks
:
block
[
name1
].
swapDataPointers
(
block
[
name2
])
...
...
@@ -213,24 +222,31 @@ class ParallelDataHandling(DataHandling):
arr
=
arr
[:,
:,
0
]
return
arr
def
run_kernel
(
self
,
kernel_function
,
*
args
,
**
kwargs
):
def
_rebuild_data_cache
(
self
):
self
.
_data_cache_cpu
=
[]
self
.
_data_cache_gpu
=
[]
elements
=
[(
self
.
_data_cache_cpu
,
wlb
.
field
.
toArray
,
self
.
_field_name_to_cpu_data_name
)]
if
self
.
_field_name_to_gpu_data_name
:
elements
.
append
((
self
.
_data_cache_gpu
,
wlb
.
cuda
.
toGpuArray
,
self
.
_field_name_to_gpu_data_name
))
for
cache
,
to_array
,
name_to_data_name
in
elements
:
for
block
in
self
.
blocks
:
field_args
=
{}
for
field_name
,
data_name
in
name_to_data_name
.
items
():
field
=
self
.
fields
[
field_name
]
arr
=
to_array
(
block
[
data_name
],
withGhostLayers
=
[
True
,
True
,
self
.
dim
==
3
])
arr
=
self
.
_normalize_arr_shape
(
arr
,
field
.
index_dimensions
)
field_args
[
field_name
]
=
arr
cache
.
append
(
field_args
)
def
run_kernel
(
self
,
kernel_function
,
**
kwargs
):
if
kernel_function
.
ast
.
backend
==
'gpucuda'
:
name_map
=
self
.
_field_name_to_gpu_data_name
to_array
=
wlb
.
cuda
.
toGpuArray
for
d
in
self
.
_data_cache_gpu
:
kernel_function
(
**
d
,
**
kwargs
)
else
:
name_map
=
self
.
_field_name_to_cpu_data_name
to_array
=
wlb
.
field
.
toArray
data_used_in_kernel
=
[(
name_map
[
p
.
field_name
],
self
.
fields
[
p
.
field_name
])
for
p
in
kernel_function
.
parameters
if
p
.
is_field_ptr_argument
and
p
.
field_name
not
in
kwargs
]
for
block
in
self
.
blocks
:
field_args
=
{}
for
data_name
,
f
in
data_used_in_kernel
:
arr
=
to_array
(
block
[
data_name
],
withGhostLayers
=
[
True
,
True
,
self
.
dim
==
3
])
arr
=
self
.
_normalize_arr_shape
(
arr
,
f
.
index_dimensions
)
field_args
[
f
.
name
]
=
arr
field_args
.
update
(
kwargs
)
kernel_function
(
*
args
,
**
field_args
)
for
d
in
self
.
_data_cache_cpu
:
kernel_function
(
**
d
,
**
kwargs
)
def
to_cpu
(
self
,
name
):
if
name
in
self
.
_custom_data_transfer_functions
:
...
...
datahandling/serial_datahandling.py
View file @
a2983309
...
...
@@ -196,10 +196,8 @@ class SerialDataHandling(DataHandling):
return
arr
def
swap
(
self
,
name1
,
name2
,
gpu
=
False
):
if
not
gpu
:
self
.
cpu_arrays
[
name1
],
self
.
cpu_arrays
[
name2
]
=
self
.
cpu_arrays
[
name2
],
self
.
cpu_arrays
[
name1
]
else
:
self
.
gpu_arrays
[
name1
],
self
.
gpu_arrays
[
name2
]
=
self
.
gpu_arrays
[
name2
],
self
.
gpu_arrays
[
name1
]
arr
=
self
.
gpu_arrays
if
gpu
else
self
.
cpu_arrays
arr
[
name1
],
arr
[
name2
]
=
arr
[
name2
],
arr
[
name1
]
def
all_to_cpu
(
self
):
for
name
in
(
self
.
cpu_arrays
.
keys
()
&
self
.
gpu_arrays
.
keys
())
|
self
.
_custom_data_transfer_functions
.
keys
():
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment