diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py index e1350e3e623a8f56805cf5dd579043a98cfa4371..8b0b13aa7a863f7dedddda7270aaebebb8cac1e2 100644 --- a/pystencils/backends/cbackend.py +++ b/pystencils/backends/cbackend.py @@ -8,7 +8,7 @@ import sympy as sp from sympy.core import S from sympy.logic.boolalg import BooleanFalse, BooleanTrue -from pystencils.astnodes import KernelFunction, Node +from pystencils.astnodes import KernelFunction, LoopOverCoordinate, Node from pystencils.cpu.vectorization import vec_all, vec_any, CachelineSize from pystencils.data_types import ( PointerType, VectorType, address_of, cast_func, create_type, get_type_of_expression, @@ -293,7 +293,14 @@ class CBackend: pre_code = '' if nontemporal and 'cachelineZero' in self._vector_instruction_set: - pre_code = f"if (((uintptr_t) {ptr} & {CachelineSize.mask_symbol}) == 0) " + "{\n\t" + \ + first_cond = f"((uintptr_t) {ptr} & {CachelineSize.mask_symbol}) == 0" + offset = sp.Add(*[sp.Symbol(LoopOverCoordinate.get_loop_counter_name(i)) + * node.lhs.args[0].field.spatial_strides[i] for i in + range(len(node.lhs.args[0].field.spatial_strides))]) + size = sp.Mul(*node.lhs.args[0].field.spatial_shape) + element_size = 8 if data_type.base_type.base_name == 'double' else 4 + size_cond = f"({offset} + {CachelineSize.symbol/element_size}) < {size}" + pre_code = f"if ({first_cond} && {size_cond}) " + "{\n\t" + \ self._vector_instruction_set['cachelineZero'].format(ptr) + ';\n}\n' code = self._vector_instruction_set[instr].format(ptr, self.sympy_printer.doprint(rhs), diff --git a/pytest.ini b/pytest.ini index 500485359e9b50696d429cdd7e879e2661d5c29e..039d41b593e3ccf0a57deecdf44f7aeaf590d46a 100644 --- a/pytest.ini +++ b/pytest.ini @@ -43,7 +43,7 @@ exclude_lines = if __name__ == .__main__.: skip_covered = True -fail_under = 88 +fail_under = 87 [html] directory = coverage_report