From 2d5fcf53bcbab2681f4345f2d90db274e76697a0 Mon Sep 17 00:00:00 2001 From: Michael Kuron <m.kuron@gmx.de> Date: Sat, 24 Apr 2021 22:25:22 +0200 Subject: [PATCH] move NontemporalFence and CachelineSize to backend --- pystencils/astnodes.py | 44 ------------------ pystencils/backends/cbackend.py | 4 +- pystencils/backends/simd_instruction_sets.py | 5 +- pystencils/cpu/vectorization.py | 48 +++++++++++++++++++- 4 files changed, 51 insertions(+), 50 deletions(-) diff --git a/pystencils/astnodes.py b/pystencils/astnodes.py index f9044d3cb..869054621 100644 --- a/pystencils/astnodes.py +++ b/pystencils/astnodes.py @@ -842,47 +842,3 @@ class ConditionalFieldAccess(sp.Function): def __getnewargs_ex__(self): return (self.access, self.outofbounds_condition, self.outofbounds_value), {} - - -class NontemporalFence(Node): - def __init__(self): - super(NontemporalFence, self).__init__(parent=None) - - @property - def symbols_defined(self): - return set() - - @property - def undefined_symbols(self): - return set() - - @property - def args(self): - return [] - - def __eq__(self, other): - return isinstance(other, NontemporalFence) - - -class CachelineSize(Node): - symbol = sp.Symbol("_clsize") - mask_symbol = sp.Symbol("_clsize_mask") - last_symbol = sp.Symbol("_cl_lastvec") - - def __init__(self): - super(CachelineSize, self).__init__(parent=None) - - @property - def symbols_defined(self): - return set([self.symbol, self.mask_symbol, self.last_symbol]) - - @property - def undefined_symbols(self): - return set() - - @property - def args(self): - return [] - - def __eq__(self, other): - return isinstance(other, CachelineSize) diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py index d11723c1d..e1350e3e6 100644 --- a/pystencils/backends/cbackend.py +++ b/pystencils/backends/cbackend.py @@ -8,8 +8,8 @@ import sympy as sp from sympy.core import S from sympy.logic.boolalg import BooleanFalse, BooleanTrue -from pystencils.astnodes import KernelFunction, Node, CachelineSize -from pystencils.cpu.vectorization import vec_all, vec_any +from pystencils.astnodes import KernelFunction, Node +from pystencils.cpu.vectorization import vec_all, vec_any, CachelineSize from pystencils.data_types import ( PointerType, VectorType, address_of, cast_func, create_type, get_type_of_expression, reinterpret_cast_func, vector_memory_access, BasicType, TypedSymbol) diff --git a/pystencils/backends/simd_instruction_sets.py b/pystencils/backends/simd_instruction_sets.py index 0b982814a..4fe147821 100644 --- a/pystencils/backends/simd_instruction_sets.py +++ b/pystencils/backends/simd_instruction_sets.py @@ -92,12 +92,13 @@ def get_cacheline_size(instruction_set): import pystencils as ps import numpy as np + from pystencils.cpu.vectorization import CachelineSize arr = np.zeros((1, 1), dtype=np.float32) f = ps.Field.create_from_numpy_array('f', arr, index_dimensions=0) - ass = [ps.astnodes.CachelineSize(), ps.Assignment(f.center, ps.astnodes.CachelineSize.symbol)] + ass = [CachelineSize(), ps.Assignment(f.center, CachelineSize.symbol)] ast = ps.create_kernel(ass, cpu_vectorize_info={'instruction_set': instruction_set}) kernel = ast.compile() - kernel(**{f.name: arr, ps.astnodes.CachelineSize.symbol.name: 0}) + kernel(**{f.name: arr, CachelineSize.symbol.name: 0}) _cachelinesize = int(arr[0, 0]) return _cachelinesize diff --git a/pystencils/cpu/vectorization.py b/pystencils/cpu/vectorization.py index 16f0a1563..c82c93588 100644 --- a/pystencils/cpu/vectorization.py +++ b/pystencils/cpu/vectorization.py @@ -26,6 +26,50 @@ class vec_all(sp.Function): nargs = (1,) +class NontemporalFence(ast.Node): + def __init__(self): + super(NontemporalFence, self).__init__(parent=None) + + @property + def symbols_defined(self): + return set() + + @property + def undefined_symbols(self): + return set() + + @property + def args(self): + return [] + + def __eq__(self, other): + return isinstance(other, NontemporalFence) + + +class CachelineSize(ast.Node): + symbol = sp.Symbol("_clsize") + mask_symbol = sp.Symbol("_clsize_mask") + last_symbol = sp.Symbol("_cl_lastvec") + + def __init__(self): + super(CachelineSize, self).__init__(parent=None) + + @property + def symbols_defined(self): + return set([self.symbol, self.mask_symbol, self.last_symbol]) + + @property + def undefined_symbols(self): + return set() + + @property + def args(self): + return [] + + def __eq__(self, other): + return isinstance(other, CachelineSize) + + def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best', assume_aligned: bool = False, nontemporal: Union[bool, Container[Union[str, Field]]] = False, assume_inner_stride_one: bool = False, assume_sufficient_line_padding: bool = True): @@ -156,9 +200,9 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, vector_width, assume_a parent = loop_node.parent while type(parent.parent.parent) is not ast.KernelFunction: parent = parent.parent - parent.parent.insert_after(ast.NontemporalFence(), parent, if_not_exists=True) + parent.parent.insert_after(NontemporalFence(), parent, if_not_exists=True) # insert CachelineSize at the beginning of the kernel - parent.parent.insert_front(ast.CachelineSize(), if_not_exists=True) + parent.parent.insert_front(CachelineSize(), if_not_exists=True) if not successful: warnings.warn("Could not vectorize loop because of non-consecutive memory access") continue -- GitLab