import numpy as np def aligned_empty(shape, byte_alignment=32, dtype=np.float64, byte_offset=0, order='C', align_inner_coordinate=True): """ Creates an aligned empty numpy array Args: shape: size of the array byte_alignment: alignment in bytes, for the start address of the array holds (a % byte_alignment) == 0 dtype: numpy data type byte_offset: offset in bytes for position that should be aligned i.e. (a+byte_offset) % byte_alignment == 0 typically used to align first inner cell instead of ghost layer order: storage linearization order align_inner_coordinate: if True, the start of the innermost coordinate lines are aligned as well """ if (not align_inner_coordinate) or (not hasattr(shape, '__len__')): size = np.prod(shape) d = np.dtype(dtype) # 2 * byte_alignment instead of 1 * byte_alignment to have slack in the end such that # vectorized loops can access vector_width elements further and don't require a tail loop tmp = np.empty(size * d.itemsize + 2 * byte_alignment, dtype=np.uint8) address = tmp.__array_interface__['data'][0] offset = (byte_alignment - (address + byte_offset) % byte_alignment) % byte_alignment return tmp[offset:offset + size * d.itemsize].view(dtype=d).reshape(shape, order=order) else: if order == 'C': dim0_size = shape[-1] dim0 = -1 dim1_size = np.prod(shape[:-1]) else: dim0_size = shape[0] dim0 = 0 dim1_size = np.prod(shape[1:]) d = np.dtype(dtype) assert byte_alignment >= d.itemsize and byte_alignment % d.itemsize == 0 padding = (byte_alignment - ((dim0_size * d.itemsize) % byte_alignment)) % byte_alignment size = dim1_size * padding + np.prod(shape) * d.itemsize tmp = aligned_empty(size, byte_alignment=byte_alignment, dtype=np.uint8, byte_offset=byte_offset) tmp = tmp.view(dtype=dtype) shape_in_bytes = [i for i in shape] shape_in_bytes[dim0] = dim0_size + padding // d.itemsize tmp = tmp.reshape(shape_in_bytes, order=order) if tmp.flags['C_CONTIGUOUS']: tmp = tmp[..., :shape[-1]] else: tmp = tmp[:shape[0], ...] return tmp def aligned_zeros(shape, byte_alignment=16, dtype=float, byte_offset=0, order='C', align_inner_coordinate=True): arr = aligned_empty(shape, dtype=dtype, byte_offset=byte_offset, order=order, byte_alignment=byte_alignment, align_inner_coordinate=align_inner_coordinate) x = np.zeros((), arr.dtype) arr[...] = x return arr def aligned_ones(shape, byte_alignment=16, dtype=float, byte_offset=0, order='C', align_inner_coordinate=True): arr = aligned_empty(shape, dtype=dtype, byte_offset=byte_offset, order=order, byte_alignment=byte_alignment, align_inner_coordinate=align_inner_coordinate) x = np.ones((), arr.dtype) arr[...] = x return arr