Commit 2d5fcf53 authored by Michael Kuron's avatar Michael Kuron
Browse files

move NontemporalFence and CachelineSize to backend

parent 269c0bfa
......@@ -842,47 +842,3 @@ class ConditionalFieldAccess(sp.Function):
def __getnewargs_ex__(self):
return (self.access, self.outofbounds_condition, self.outofbounds_value), {}
class NontemporalFence(Node):
def __init__(self):
super(NontemporalFence, self).__init__(parent=None)
@property
def symbols_defined(self):
return set()
@property
def undefined_symbols(self):
return set()
@property
def args(self):
return []
def __eq__(self, other):
return isinstance(other, NontemporalFence)
class CachelineSize(Node):
symbol = sp.Symbol("_clsize")
mask_symbol = sp.Symbol("_clsize_mask")
last_symbol = sp.Symbol("_cl_lastvec")
def __init__(self):
super(CachelineSize, self).__init__(parent=None)
@property
def symbols_defined(self):
return set([self.symbol, self.mask_symbol, self.last_symbol])
@property
def undefined_symbols(self):
return set()
@property
def args(self):
return []
def __eq__(self, other):
return isinstance(other, CachelineSize)
......@@ -8,8 +8,8 @@ import sympy as sp
from sympy.core import S
from sympy.logic.boolalg import BooleanFalse, BooleanTrue
from pystencils.astnodes import KernelFunction, Node, CachelineSize
from pystencils.cpu.vectorization import vec_all, vec_any
from pystencils.astnodes import KernelFunction, Node
from pystencils.cpu.vectorization import vec_all, vec_any, CachelineSize
from pystencils.data_types import (
PointerType, VectorType, address_of, cast_func, create_type, get_type_of_expression,
reinterpret_cast_func, vector_memory_access, BasicType, TypedSymbol)
......
......@@ -92,12 +92,13 @@ def get_cacheline_size(instruction_set):
import pystencils as ps
import numpy as np
from pystencils.cpu.vectorization import CachelineSize
arr = np.zeros((1, 1), dtype=np.float32)
f = ps.Field.create_from_numpy_array('f', arr, index_dimensions=0)
ass = [ps.astnodes.CachelineSize(), ps.Assignment(f.center, ps.astnodes.CachelineSize.symbol)]
ass = [CachelineSize(), ps.Assignment(f.center, CachelineSize.symbol)]
ast = ps.create_kernel(ass, cpu_vectorize_info={'instruction_set': instruction_set})
kernel = ast.compile()
kernel(**{f.name: arr, ps.astnodes.CachelineSize.symbol.name: 0})
kernel(**{f.name: arr, CachelineSize.symbol.name: 0})
_cachelinesize = int(arr[0, 0])
return _cachelinesize
......@@ -26,6 +26,50 @@ class vec_all(sp.Function):
nargs = (1,)
class NontemporalFence(ast.Node):
def __init__(self):
super(NontemporalFence, self).__init__(parent=None)
@property
def symbols_defined(self):
return set()
@property
def undefined_symbols(self):
return set()
@property
def args(self):
return []
def __eq__(self, other):
return isinstance(other, NontemporalFence)
class CachelineSize(ast.Node):
symbol = sp.Symbol("_clsize")
mask_symbol = sp.Symbol("_clsize_mask")
last_symbol = sp.Symbol("_cl_lastvec")
def __init__(self):
super(CachelineSize, self).__init__(parent=None)
@property
def symbols_defined(self):
return set([self.symbol, self.mask_symbol, self.last_symbol])
@property
def undefined_symbols(self):
return set()
@property
def args(self):
return []
def __eq__(self, other):
return isinstance(other, CachelineSize)
def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best',
assume_aligned: bool = False, nontemporal: Union[bool, Container[Union[str, Field]]] = False,
assume_inner_stride_one: bool = False, assume_sufficient_line_padding: bool = True):
......@@ -156,9 +200,9 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, vector_width, assume_a
parent = loop_node.parent
while type(parent.parent.parent) is not ast.KernelFunction:
parent = parent.parent
parent.parent.insert_after(ast.NontemporalFence(), parent, if_not_exists=True)
parent.parent.insert_after(NontemporalFence(), parent, if_not_exists=True)
# insert CachelineSize at the beginning of the kernel
parent.parent.insert_front(ast.CachelineSize(), if_not_exists=True)
parent.parent.insert_front(CachelineSize(), if_not_exists=True)
if not successful:
warnings.warn("Could not vectorize loop because of non-consecutive memory access")
continue
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment