Skip to content
Snippets Groups Projects
Commit 152fc087 authored by Markus Holzer's avatar Markus Holzer Committed by Michael Kuron
Browse files

Fix field size

parent 30641109
No related merge requests found
...@@ -59,6 +59,7 @@ from appdirs import user_cache_dir, user_config_dir ...@@ -59,6 +59,7 @@ from appdirs import user_cache_dir, user_config_dir
from pystencils import FieldType from pystencils import FieldType
from pystencils.backends.cbackend import generate_c, get_headers from pystencils.backends.cbackend import generate_c, get_headers
from pystencils.data_types import cast_func, VectorType
from pystencils.include import get_pystencils_include_path from pystencils.include import get_pystencils_include_path
from pystencils.kernel_wrapper import KernelWrapper from pystencils.kernel_wrapper import KernelWrapper
from pystencils.utils import atomic_file_write, file_handle_for_atomic_write, recursive_dict_update from pystencils.utils import atomic_file_write, file_handle_for_atomic_write, recursive_dict_update
...@@ -266,7 +267,6 @@ type_mapping = { ...@@ -266,7 +267,6 @@ type_mapping = {
np.complex128: (('PyComplex_RealAsDouble', 'PyComplex_ImagAsDouble'), 'ComplexDouble'), np.complex128: (('PyComplex_RealAsDouble', 'PyComplex_ImagAsDouble'), 'ComplexDouble'),
} }
template_extract_scalar = """ template_extract_scalar = """
PyObject * obj_{name} = PyDict_GetItemString(kwargs, "{name}"); PyObject * obj_{name} = PyDict_GetItemString(kwargs, "{name}");
if( obj_{name} == NULL) {{ PyErr_SetString(PyExc_TypeError, "Keyword argument '{name}' missing"); return NULL; }}; if( obj_{name} == NULL) {{ PyErr_SetString(PyExc_TypeError, "Keyword argument '{name}' missing"); return NULL; }};
...@@ -357,7 +357,7 @@ def equal_size_check(fields): ...@@ -357,7 +357,7 @@ def equal_size_check(fields):
return template_size_check.format(cond=cond) return template_size_check.format(cond=cond)
def create_function_boilerplate_code(parameter_info, name, insert_checks=True): def create_function_boilerplate_code(parameter_info, name, ast_node, insert_checks=True):
pre_call_code = "" pre_call_code = ""
parameters = [] parameters = []
post_call_code = "" post_call_code = ""
...@@ -375,6 +375,25 @@ def create_function_boilerplate_code(parameter_info, name, insert_checks=True): ...@@ -375,6 +375,25 @@ def create_function_boilerplate_code(parameter_info, name, insert_checks=True):
np_dtype = field.dtype.numpy_dtype np_dtype = field.dtype.numpy_dtype
item_size = np_dtype.itemsize item_size = np_dtype.itemsize
aligned = False
if ast_node.assignments:
aligned = any([a.lhs.args[2] for a in ast_node.assignments
if hasattr(a, 'lhs') and isinstance(a.lhs, cast_func)
and hasattr(a.lhs, 'dtype') and isinstance(a.lhs.dtype, VectorType)])
if ast_node.instruction_set and aligned:
byte_width = ast_node.instruction_set['width'] * item_size
offset = max(max(ast_node.ghost_layers)) * item_size
offset_cond = f"(((uintptr_t) buffer_{field.name}.buf) + {offset}) % {byte_width} == 0"
message = str(offset) + ". This is probably due to a different number of ghost_layers chosen for " \
"the arrays and the kernel creation. If the number of ghost layers for " \
"the kernel creation is not specified it will choose a suitable value " \
"automatically. This value might not " \
"be compatible with the allocated arrays."
pre_call_code += template_check_array.format(cond=offset_cond, what="offset", name=field.name,
expected=message)
if (np_dtype.isbuiltin and FieldType.is_generic(field) if (np_dtype.isbuiltin and FieldType.is_generic(field)
and not np.issubdtype(field.dtype.numpy_dtype, np.complexfloating)): and not np.issubdtype(field.dtype.numpy_dtype, np.complexfloating)):
dtype_cond = "buffer_{name}.format[0] == '{format}'".format(name=field.name, dtype_cond = "buffer_{name}.format[0] == '{format}'".format(name=field.name,
...@@ -418,7 +437,7 @@ def create_function_boilerplate_code(parameter_info, name, insert_checks=True): ...@@ -418,7 +437,7 @@ def create_function_boilerplate_code(parameter_info, name, insert_checks=True):
extract_function_imag=extract_function[1], extract_function_imag=extract_function[1],
target_type=target_type, target_type=target_type,
real_type="float" if target_type == "ComplexFloat" real_type="float" if target_type == "ComplexFloat"
else "double", else "double",
name=param.symbol.name) name=param.symbol.name)
else: else:
pre_call_code += template_extract_scalar.format(extract_function=extract_function, pre_call_code += template_extract_scalar.format(extract_function=extract_function,
...@@ -481,12 +500,16 @@ class ExtensionModuleCode: ...@@ -481,12 +500,16 @@ class ExtensionModuleCode:
self._ast_nodes = [] self._ast_nodes = []
self._function_names = [] self._function_names = []
self._custom_backend = custom_backend self._custom_backend = custom_backend
self._code_string = str()
self._code_hash = None
def add_function(self, ast, name=None): def add_function(self, ast, name=None):
self._ast_nodes.append(ast) self._ast_nodes.append(ast)
self._function_names.append(name if name is not None else ast.function_name) self._function_names.append(name if name is not None else ast.function_name)
def write_to_file(self, restrict_qualifier, function_prefix, file): def create_code_string(self, restrict_qualifier, function_prefix):
self._code_string = str()
headers = {'<math.h>', '<stdint.h>'} headers = {'<math.h>', '<stdint.h>'}
for ast in self._ast_nodes: for ast in self._ast_nodes:
headers.update(get_headers(ast)) headers.update(get_headers(ast))
...@@ -495,19 +518,29 @@ class ExtensionModuleCode: ...@@ -495,19 +518,29 @@ class ExtensionModuleCode:
header_list.insert(0, '"Python.h"') header_list.insert(0, '"Python.h"')
includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list]) includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])
print(includes, file=file) self._code_string += includes
print("\n", file=file) self._code_string += "\n"
print(f"#define RESTRICT {restrict_qualifier}", file=file) self._code_string += f"#define RESTRICT {restrict_qualifier} \n"
print(f"#define FUNC_PREFIX {function_prefix}", file=file) self._code_string += f"#define FUNC_PREFIX {function_prefix}"
print("\n", file=file) self._code_string += "\n"
for ast, name in zip(self._ast_nodes, self._function_names): for ast, name in zip(self._ast_nodes, self._function_names):
old_name = ast.function_name old_name = ast.function_name
ast.function_name = "kernel_" + name ast.function_name = "kernel_" + name
print(generate_c(ast, custom_backend=self._custom_backend), file=file) self._code_string += generate_c(ast, custom_backend=self._custom_backend)
print(create_function_boilerplate_code(ast.get_parameters(), name), file=file) self._code_string += create_function_boilerplate_code(ast.get_parameters(), name, ast)
ast.function_name = old_name ast.function_name = old_name
print(create_module_boilerplate_code(self.module_name, self._function_names), file=file)
self._code_hash = "mod_" + hashlib.sha256(self._code_string.encode()).hexdigest()
self._code_string += create_module_boilerplate_code(self._code_hash, self._function_names)
def get_hash_of_code(self):
assert self._code_string, "The code must be generated first"
return self._code_hash
def write_to_file(self, file):
assert self._code_string, "The code must be generated first"
print(self._code_string, file=file)
def compile_module(code, code_hash, base_dir): def compile_module(code, code_hash, base_dir):
...@@ -515,12 +548,10 @@ def compile_module(code, code_hash, base_dir): ...@@ -515,12 +548,10 @@ def compile_module(code, code_hash, base_dir):
extra_flags = ['-I' + get_paths()['include'], '-I' + get_pystencils_include_path()] extra_flags = ['-I' + get_paths()['include'], '-I' + get_pystencils_include_path()]
if compiler_config['os'].lower() == 'windows': if compiler_config['os'].lower() == 'windows':
function_prefix = '__declspec(dllexport)'
lib_suffix = '.pyd' lib_suffix = '.pyd'
object_suffix = '.obj' object_suffix = '.obj'
windows = True windows = True
else: else:
function_prefix = ''
lib_suffix = '.so' lib_suffix = '.so'
object_suffix = '.o' object_suffix = '.o'
windows = False windows = False
...@@ -531,7 +562,7 @@ def compile_module(code, code_hash, base_dir): ...@@ -531,7 +562,7 @@ def compile_module(code, code_hash, base_dir):
if not os.path.exists(object_file): if not os.path.exists(object_file):
with file_handle_for_atomic_write(src_file) as f: with file_handle_for_atomic_write(src_file) as f:
code.write_to_file(compiler_config['restrict_qualifier'], function_prefix, f) code.write_to_file(f)
if windows: if windows:
compile_cmd = ['cl.exe', '/c', '/EHsc'] + compiler_config['flags'].split() compile_cmd = ['cl.exe', '/c', '/EHsc'] + compiler_config['flags'].split()
...@@ -564,11 +595,16 @@ def compile_module(code, code_hash, base_dir): ...@@ -564,11 +595,16 @@ def compile_module(code, code_hash, base_dir):
def compile_and_load(ast, custom_backend=None): def compile_and_load(ast, custom_backend=None):
cache_config = get_cache_config() cache_config = get_cache_config()
code_hash_str = "mod_" + hashlib.sha256(generate_c(ast, dialect='c',
custom_backend=custom_backend).encode()).hexdigest() compiler_config = get_compiler_config()
code = ExtensionModuleCode(module_name=code_hash_str, custom_backend=custom_backend) function_prefix = '__declspec(dllexport)' if compiler_config['os'].lower() == 'windows' else ''
code = ExtensionModuleCode(custom_backend=custom_backend)
code.add_function(ast, ast.function_name) code.add_function(ast, ast.function_name)
code.create_code_string(compiler_config['restrict_qualifier'], function_prefix)
code_hash_str = code.get_hash_of_code()
if cache_config['object_cache'] is False: if cache_config['object_cache'] is False:
with TemporaryDirectory() as base_dir: with TemporaryDirectory() as base_dir:
lib_file = compile_module(code, code_hash_str, base_dir) lib_file = compile_module(code, code_hash_str, base_dir)
......
...@@ -13,7 +13,7 @@ from pystencils.cpu.vectorization import vec_all, vec_any ...@@ -13,7 +13,7 @@ from pystencils.cpu.vectorization import vec_all, vec_any
def test_vec_any(): def test_vec_any():
data_arr = np.zeros((15, 15)) data_arr = np.zeros((15, 15))
data_arr[3:9, 2:7] = 1.0 data_arr[3:9, 1] = 1.0
data = ps.fields("data: double[2D]", data=data_arr) data = ps.fields("data: double[2D]", data=data_arr)
c = [ c = [
...@@ -22,11 +22,15 @@ def test_vec_any(): ...@@ -22,11 +22,15 @@ def test_vec_any():
ps.Assignment(data.center(), 2.0) ps.Assignment(data.center(), 2.0)
])) ]))
] ]
instruction_set = get_supported_instruction_sets()[-1]
ast = ps.create_kernel(c, target='cpu', ast = ps.create_kernel(c, target='cpu',
cpu_vectorize_info={'instruction_set': get_supported_instruction_sets()[-1]}) cpu_vectorize_info={'instruction_set': instruction_set})
kernel = ast.compile() kernel = ast.compile()
kernel(data=data_arr) kernel(data=data_arr)
np.testing.assert_equal(data_arr[3:9, 0:8], 2.0)
width = ast.instruction_set['width']
np.testing.assert_equal(data_arr[3:9, 0:width], 2.0)
@pytest.mark.skipif(not get_supported_instruction_sets(), reason='cannot detect CPU instruction set') @pytest.mark.skipif(not get_supported_instruction_sets(), reason='cannot detect CPU instruction set')
......
...@@ -4,7 +4,8 @@ import numpy as np ...@@ -4,7 +4,8 @@ import numpy as np
import sympy as sp import sympy as sp
import pystencils as ps import pystencils as ps
from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set
from pystencils.data_types import cast_func, VectorType
supported_instruction_sets = get_supported_instruction_sets() if get_supported_instruction_sets() else [] supported_instruction_sets = get_supported_instruction_sets() if get_supported_instruction_sets() else []
...@@ -49,3 +50,30 @@ def test_vectorized_abs(instruction_set, dtype): ...@@ -49,3 +50,30 @@ def test_vectorized_abs(instruction_set, dtype):
dst = np.zeros_like(arr) dst = np.zeros_like(arr)
func(g=dst, f=arr) func(g=dst, f=arr)
np.testing.assert_equal(np.sum(dst[1:-1, 1:-1]), 2 ** 2 * 2 ** 3) np.testing.assert_equal(np.sum(dst[1:-1, 1:-1]), 2 ** 2 * 2 ** 3)
@pytest.mark.parametrize('dtype', ('float', 'double'))
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('gl_field, gl_kernel', [(1, 0), (0, 1), (1, 1)])
def test_alignment_and_correct_ghost_layers(gl_field, gl_kernel, instruction_set, dtype):
itemsize = 8 if dtype == 'double' else 4
alignment = get_vector_instruction_set(dtype, instruction_set)['width'] * itemsize
dtype = np.float64 if dtype == 'double' else np.float32
domain_size = (128, 128)
dh = ps.create_data_handling(domain_size, periodicity=(True, True), default_target='cpu')
src = dh.add_array("src", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=alignment)
dh.fill(src.name, 1.0, ghost_layers=True)
dst = dh.add_array("dst", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=alignment)
dh.fill(dst.name, 1.0, ghost_layers=True)
update_rule = ps.Assignment(dst[0, 0], src[0, 0])
opt = {'instruction_set': instruction_set, 'assume_aligned': True,
'nontemporal': True, 'assume_inner_stride_one': True}
ast = ps.create_kernel(update_rule, target=dh.default_target, cpu_vectorize_info=opt, ghost_layers=gl_kernel)
kernel = ast.compile()
if gl_kernel != gl_field:
with pytest.raises(ValueError):
dh.run_kernel(kernel)
else:
dh.run_kernel(kernel)
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment