Skip to content
Snippets Groups Projects
Commit bcd2d628 authored by Michael Kuron's avatar Michael Kuron :mortar_board:
Browse files

Allow fields to be aligned to cacheline size

parent a100b5b0
Branches
Tags
No related merge requests found
......@@ -10,20 +10,28 @@ def aligned_empty(shape, byte_alignment=True, dtype=np.float64, byte_offset=0, o
shape: size of the array
byte_alignment: alignment in bytes, for the start address of the array holds (a % byte_alignment) == 0
By default, use the maximum required by the CPU (or 512 bits if this cannot be detected).
When 'cacheline' is specified, the size of a cache line is used.
dtype: numpy data type
byte_offset: offset in bytes for position that should be aligned i.e. (a+byte_offset) % byte_alignment == 0
typically used to align first inner cell instead of ghost layer
order: storage linearization order
align_inner_coordinate: if True, the start of the innermost coordinate lines are aligned as well
"""
if byte_alignment is True:
from pystencils.backends.simd_instruction_sets import (get_supported_instruction_sets,
if byte_alignment is True or byte_alignment == 'cacheline':
from pystencils.backends.simd_instruction_sets import (get_supported_instruction_sets, get_cacheline_size,
get_vector_instruction_set)
type_name = BasicType.numpy_name_to_c(np.dtype(dtype).name)
instruction_sets = get_supported_instruction_sets()
if instruction_sets is None:
byte_alignment = 64
elif byte_alignment == 'cacheline':
cacheline_sizes = [get_cacheline_size(is_name) for is_name in instruction_sets]
if all([s is None for s in cacheline_sizes]):
byte_alignment = max([get_vector_instruction_set(type_name, is_name)['width'] * np.dtype(dtype).itemsize
for is_name in instruction_sets])
else:
byte_alignment = max([s for s in cacheline_sizes if s is not None])
else:
byte_alignment = max([get_vector_instruction_set(type_name, is_name)['width'] * np.dtype(dtype).itemsize
for is_name in instruction_sets])
......
......@@ -317,7 +317,7 @@ class CBackend:
if self._vector_instruction_set:
align = self._vector_instruction_set['bytes']
else:
align = node.symbol.dtype.base_type.numpy_dtype.type(0).nbytes
align = node.symbol.dtype.base_type.numpy_dtype.itemsize
np_dtype = node.symbol.dtype.base_type.numpy_dtype
required_size = np_dtype.itemsize * node.size + align
......@@ -341,7 +341,7 @@ class CBackend:
if self._vector_instruction_set:
align = self._vector_instruction_set['bytes']
else:
align = node.symbol.dtype.base_type.numpy_dtype.type(0).nbytes
align = node.symbol.dtype.base_type.numpy_dtype.itemsize
code = "#if defined(_MSC_VER)\n"
code += "_aligned_free(%s - %d);\n" % (self.sympy_printer.doprint(node.symbol.name), node.offset(align))
......
......@@ -15,6 +15,7 @@ def get_vector_instruction_set(data_type='double', instruction_set='avx'):
_cache = None
_cachelinesize = None
def get_supported_instruction_sets():
......@@ -56,3 +57,27 @@ def get_supported_instruction_sets():
if flags.issuperset(required_neon_flags):
result.append("neon")
return result
def get_cacheline_size(instruction_set):
"""Get the size (in bytes) of a cache block that can be zeroed without memory access.
Usually, this is identical to the cache line size."""
global _cachelinesize
instruction_sets = get_vector_instruction_set('double', instruction_set)
if 'cachelineSize' not in instruction_sets:
return None
if _cachelinesize is not None:
return _cachelinesize
import pystencils as ps
import numpy as np
arr = np.zeros((1, 1), dtype=np.float32)
f = ps.Field.create_from_numpy_array('f', arr, index_dimensions=0)
ass = [ps.astnodes.CachelineSize(), ps.Assignment(f.center, ps.astnodes.CachelineSize.symbol)]
ast = ps.create_kernel(ass, cpu_vectorize_info={'instruction_set': instruction_set})
kernel = ast.compile()
kernel(**{f.name: arr, ps.astnodes.CachelineSize.symbol.name: 0})
_cachelinesize = int(arr[0, 0])
return _cachelinesize
......@@ -38,12 +38,8 @@ def test_aligned_and_nt_stores(openmp=False):
# create a datahandling object
dh = ps.create_data_handling(domain_size, periodicity=(True, True), parallel=False, default_target='cpu')
if openmp:
alignment = 128 if instruction_set == 'vsx' else 64 if instruction_set == 'neon' else True
else:
alignment = True
# fields
alignment = 'cacheline' if openmp else True
g = dh.add_array("g", values_per_cell=1, alignment=alignment)
dh.fill("g", 1.0, ghost_layers=True)
f = dh.add_array("f", values_per_cell=1, alignment=alignment)
......
......@@ -4,7 +4,8 @@ import numpy as np
import sympy as sp
import pystencils as ps
from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set
from pystencils.backends.simd_instruction_sets import (get_cacheline_size, get_supported_instruction_sets,
get_vector_instruction_set)
from pystencils.data_types import cast_func, VectorType
supported_instruction_sets = get_supported_instruction_sets() if get_supported_instruction_sets() else []
......@@ -76,4 +77,16 @@ def test_alignment_and_correct_ghost_layers(gl_field, gl_kernel, instruction_set
with pytest.raises(ValueError):
dh.run_kernel(kernel)
else:
dh.run_kernel(kernel)
\ No newline at end of file
dh.run_kernel(kernel)
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
def test_cacheline_size(instruction_set):
cacheline_size = get_cacheline_size(instruction_set)
if cacheline_size is None:
pytest.skip()
instruction_set = get_vector_instruction_set('double', instruction_set)
vector_size = instruction_set['bytes']
assert cacheline_size > 8 and cacheline_size < 0x100000, "Cache line size is implausible"
assert cacheline_size % vector_size == 0, "Cache line size should be multiple of vector size"
assert cacheline_size & (cacheline_size - 1) == 0, "Cache line size is not a power of 2"
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment