diff --git a/CMakeLists.txt b/CMakeLists.txt
index bdaf0ea9ac8c7531815f8f27bad3943eec566191..f3ff04c9e922cf9fbc6035f3353a09ba85205a25 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -357,12 +357,23 @@ endif()
 # architecture optimization
 if( WALBERLA_OPTIMIZE_FOR_LOCALHOST )
    if( WALBERLA_CXX_COMPILER_IS_GNU OR WALBERLA_CXX_COMPILER_IS_INTEL OR WALBERLA_CXX_COMPILER_IS_CLANG )
-      add_flag ( CMAKE_CXX_FLAGS "-march=native" )
-      add_flag ( CMAKE_C_FLAGS   "-march=native" )
+      if( CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64" )
+        # no -march=native available on this compiler, but there is currently only one such processor
+      else()
+        add_flag ( CMAKE_CXX_FLAGS "-march=native" )
+        add_flag ( CMAKE_C_FLAGS   "-march=native" )
+      endif()
+      
       if( WALBERLA_CXX_COMPILER_IS_INTEL )
         add_flag ( CMAKE_CXX_FLAGS "-xhost" )
         add_flag ( CMAKE_C_FLAGS   "-xhost" )
       endif()
+
+      if( EXISTS "/proc/sys/abi/sve_default_vector_length" )
+        file( READ "/proc/sys/abi/sve_default_vector_length" SVE_LENGTH )
+        add_flag ( CMAKE_CXX_FLAGS "-msve-vector-bits=${SVE_LENGTH}" )
+        add_flag ( CMAKE_C_FLAGS   "-msve-vector-bits=${SVE_LENGTH}" )
+      endif()
    endif()
 endif()
 
diff --git a/python/pystencils_walberla/codegen.py b/python/pystencils_walberla/codegen.py
index 44de5c3fb878cf1b34c7c311ce1cdc1da8daae8a..f1a84798f2af17c254febf5e4c327964db824d91 100644
--- a/python/pystencils_walberla/codegen.py
+++ b/python/pystencils_walberla/codegen.py
@@ -61,6 +61,8 @@ def generate_sweep(generation_context, class_name, assignments,
     else:
         ast = create_staggered_kernel(assignments, **create_kernel_params)
     ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one']
+    ast.nontemporal = create_kernel_params['cpu_vectorize_info']['nontemporal']
+    ast.openmp = create_kernel_params['cpu_openmp']
 
     def to_name(f):
         return f.name if isinstance(f, Field) else f
diff --git a/python/pystencils_walberla/jinja_filters.py b/python/pystencils_walberla/jinja_filters.py
index 7158f81685b5382f7516299af574fd0d374be47a..ad6d317878ed6ce650ad81c7223f6d744cba79a5 100644
--- a/python/pystencils_walberla/jinja_filters.py
+++ b/python/pystencils_walberla/jinja_filters.py
@@ -238,37 +238,50 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non
         if param.is_field_pointer:
             field = param.fields[0]
             if field.field_type == FieldType.BUFFER:
-                kernel_call_lines.append("%s %s = %s;" % (param.symbol.dtype, param.symbol.name, param.field_name))
+                kernel_call_lines.append(f"{param.symbol.dtype} {param.symbol.name} = {param.field_name};")
             else:
                 coordinates = get_start_coordinates(field)
-                actual_gls = "int_c(%s->nrOfGhostLayers())" % (param.field_name, )
+                actual_gls = f"int_c({param.field_name}->nrOfGhostLayers())"
                 coord_set = set(coordinates)
                 coord_set = sorted(coord_set, key=lambda e: str(e))
                 for c in coord_set:
-                    kernel_call_lines.append("WALBERLA_ASSERT_GREATER_EQUAL(%s, -%s);" %
-                                             (c, actual_gls))
+                    kernel_call_lines.append(f"WALBERLA_ASSERT_GREATER_EQUAL({c}, -{actual_gls});")
                 while len(coordinates) < 4:
                     coordinates.append(0)
                 coordinates = tuple(coordinates)
-                kernel_call_lines.append("%s %s = %s->dataAt(%s, %s, %s, %s);" %
-                                         ((param.symbol.dtype, param.symbol.name, param.field_name) + coordinates))
+                kernel_call_lines.append(f"{param.symbol.dtype} {param.symbol.name} = {param.field_name}->dataAt"
+                                         f"({coordinates[0]}, {coordinates[1]}, {coordinates[2]}, {coordinates[3]});")
                 if ast.assumed_inner_stride_one and field.index_dimensions > 0:
-                    kernel_call_lines.append("WALBERLA_ASSERT_EQUAL(%s->layout(), field::fzyx);" % (param.field_name,))
+                    kernel_call_lines.append(f"WALBERLA_ASSERT_EQUAL({param.field_name}->layout(), field::fzyx);")
+                if ast.instruction_set and ast.assumed_inner_stride_one:
+                    if ast.nontemporal and ast.openmp and 'cachelineZero' in ast.instruction_set:
+                        kernel_call_lines.append(f"WALBERLA_ASSERT_EQUAL((uintptr_t) {field.name}->dataAt(0, 0, 0, 0) %"
+                                                 f"{ast.instruction_set['cachelineSize']}, 0);")
+                    else:
+                        kernel_call_lines.append(f"WALBERLA_ASSERT_EQUAL((uintptr_t) {field.name}->dataAt(0, 0, 0, 0) %"
+                                                 f"{ast.instruction_set['bytes']}, 0);")
         elif param.is_field_stride:
             casted_stride = get_field_stride(param)
             type_str = param.symbol.dtype.base_name
-            kernel_call_lines.append("const %s %s = %s;" % (type_str, param.symbol.name, casted_stride))
+            kernel_call_lines.append(f"const {type_str} {param.symbol.name} = {casted_stride};")
         elif param.is_field_shape:
             coord = param.symbol.coordinate
             field = param.fields[0]
             type_str = param.symbol.dtype.base_name
-            shape = "%s(%s)" % (type_str, get_end_coordinates(field)[coord])
+            shape = f"{type_str}({get_end_coordinates(field)[coord]})"
             assert coord < 3
-            max_value = "%s->%sSizeWithGhostLayer()" % (field.name, ('x', 'y', 'z')[coord])
-            kernel_call_lines.append("WALBERLA_ASSERT_GREATER_EQUAL(%s, %s);" % (max_value, shape))
-            kernel_call_lines.append("const %s %s = %s;" % (type_str, param.symbol.name, shape))
+            max_value = f"{field.name}->{('x', 'y', 'z')[coord]}SizeWithGhostLayer()"
+            kernel_call_lines.append(f"WALBERLA_ASSERT_GREATER_EQUAL({max_value}, {shape});")
+            kernel_call_lines.append(f"const {type_str} {param.symbol.name} = {shape};")
             if ast.assumed_inner_stride_one and field.index_dimensions > 0:
-                kernel_call_lines.append("WALBERLA_ASSERT_EQUAL(%s->layout(), field::fzyx);" % (field.name,))
+                kernel_call_lines.append(f"WALBERLA_ASSERT_EQUAL({field.name}->layout(), field::fzyx);")
+            if ast.instruction_set and ast.assumed_inner_stride_one:
+                if ast.nontemporal and ast.openmp and 'cachelineZero' in ast.instruction_set:
+                    kernel_call_lines.append(f"WALBERLA_ASSERT_EQUAL((uintptr_t) {field.name}->dataAt(0, 0, 0, 0) %"
+                                             f"{ast.instruction_set['cachelineSize']}, 0);")
+                else:
+                    kernel_call_lines.append(f"WALBERLA_ASSERT_EQUAL((uintptr_t) {field.name}->dataAt(0, 0, 0, 0) %"
+                                             f"{ast.instruction_set['bytes']}, 0);")
 
     call_parameters = ", ".join([p.symbol.name for p in ast_params])
 
diff --git a/src/field/Field.impl.h b/src/field/Field.impl.h
index a70bdf813d071293c4c20674ac307a4221db66c5..53e689c1186aa3643b6adda22684863e5da9be6d 100644
--- a/src/field/Field.impl.h
+++ b/src/field/Field.impl.h
@@ -316,7 +316,13 @@ namespace field {
       // Automatically select allocator if none was given
       if ( alloc == nullptr )
       {
-#ifdef __BIGGEST_ALIGNMENT__
+#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS > 0
+         const uint_t alignment = __ARM_FEATURE_SVE_BITS/8;
+#elif defined(__ARM_FEATURE_SVE)
+         const uint_t alignment = 64;
+#elif defined(__ARM_NEON)
+         const uint_t alignment = 16;
+#elif defined(__BIGGEST_ALIGNMENT__)
          const uint_t alignment = __BIGGEST_ALIGNMENT__;
 #elif defined(__AVX512F__)
          const uint_t alignment = 64;