diff --git a/pystencils/backends/arm_instruction_sets.py b/pystencils/backends/arm_instruction_sets.py
index 1f2f51cd6154a63beab7c4b6939e8f41b6ad976d..6c388f3e4cb798297f1902b06ef82b0909d191a5 100644
--- a/pystencils/backends/arm_instruction_sets.py
+++ b/pystencils/backends/arm_instruction_sets.py
@@ -33,21 +33,21 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon', q
         '<': 'clt[0, 1]',
         '>=': 'cge[0, 1]',
         '>': 'cgt[0, 1]',
-        # '&': 'and[0, 1]', -> only for integer values available
-        # '|': 'orr[0, 1]'
-
     }
 
     bits = {'double': 64,
-            'float': 32}
+            'float': 32,
+            'int': 32}
 
     if q_registers is True:
         q_reg = 'q'
         width = 128 // bits[data_type]
+        intwidth = 128 // bits[data_type]
         suffix = f'q_f{bits[data_type]}'
     else:
         q_reg = ''
         width = 64 // bits[data_type]
+        intwidth = 64 // bits[data_type]
         suffix = f'_f{bits[data_type]}'
 
     result = dict()
@@ -60,16 +60,26 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon', q
 
         result[intrinsic_id] = 'v' + name + suffix + arg_string
 
-    result['makeVecConst'] = 'vdup' + q_reg + '_n_f' + str(bits[data_type]) + '({0})'
-    result['makeVec'] = 'vdup' + q_reg + '_n_f' + str(bits[data_type]) + '({0})'
+    result['makeVecConst'] = f'vdup{q_reg}_n_f{bits[data_type]}' + '({0})'
+    result['makeVec'] = f'vdup{q_reg}_n_f{bits[data_type]}' + '({0})'
+    result['makeVecConstInt'] = f'vdup{q_reg}_n_s{bits["int"]}' + '({0})'
+    result['makeVecInt'] = f'vdup{q_reg}_n_s{bits["int"]}' + '({0})'
+
+    result['+int'] = f"vaddq_s{bits['int']}" + "({0}, {1})"
 
     result['rsqrt'] = None
 
     result['width'] = width
-    result['double'] = 'float64x' + str(width) + '_t'
-    result['float'] = 'float32x' + str(width * 2) + '_t'
+    result['intwidth'] = intwidth
+    result[data_type] = f'float{bits[data_type]}x{width}_t'
+    result['int'] = f'int{bits["int"]}x{bits[data_type]}_t'
+    result['bool'] = f'uint{bits[data_type]}x{width}_t'
     result['headers'] = ['<arm_neon.h>']
 
-    result['!='] = 'vmvnq_u%d(%s)' % (bits[data_type], result['=='])
+    result['!='] = f'vmvn{q_reg}_u{bits[data_type]}({result["=="]})'
+
+    result['&'] = f'vand{q_reg}_u{bits[data_type]}' + '({0}, {1})'
+    result['|'] = f'vorr{q_reg}_u{bits[data_type]}' + '({0}, {1})'
+    result['blendv'] = f'vbsl{q_reg}_f{bits[data_type]}' + '({2}, {1}, {0})'
 
     return result
diff --git a/pystencils/backends/simd_instruction_sets.py b/pystencils/backends/simd_instruction_sets.py
index 44284d6bbbfd3fec99306408c22c5d34493eb508..3bcbaee1be5bdf9ac9d5c50bc9e0d109f4a21b3f 100644
--- a/pystencils/backends/simd_instruction_sets.py
+++ b/pystencils/backends/simd_instruction_sets.py
@@ -1,3 +1,5 @@
+import platform
+
 from pystencils.backends.x86_instruction_sets import get_vector_instruction_set_x86
 from pystencils.backends.arm_instruction_sets import get_vector_instruction_set_arm
 
@@ -11,6 +13,8 @@ def get_vector_instruction_set(data_type='double', instruction_set='avx', q_regi
 
 def get_supported_instruction_sets():
     """List of supported instruction sets on current hardware, or None if query failed."""
+    if platform.system() == 'Darwin' and platform.machine() == 'arm64':  # not supported by cpuinfo
+        return ['neon']
     try:
         from cpuinfo import get_cpu_info
     except ImportError:
diff --git a/pystencils/cpu/cpujit.py b/pystencils/cpu/cpujit.py
index 84908dcb29b38fc89a1b381d1491fbde59a09db7..ab4f8f84e628577d0b2343744e74954b541d4ce0 100644
--- a/pystencils/cpu/cpujit.py
+++ b/pystencils/cpu/cpujit.py
@@ -171,6 +171,13 @@ def read_config():
             ('flags', '-Ofast -DNDEBUG -fPIC -march=native -Xclang -fopenmp -std=c++11'),
             ('restrict_qualifier', '__restrict__')
         ])
+        if platform.machine() == 'arm64':
+            default_compiler_config['flags'] = default_compiler_config['flags'].replace('-march=native ', '')
+        for libomp in ['/opt/local/lib/libomp/libomp.dylib', '/usr/local/lib/libomp.dylib',
+                       '/opt/homebrew/lib/libomp.dylib']:
+            if os.path.exists(libomp):
+                default_compiler_config['flags'] += ' ' + libomp
+                break
     default_cache_config = OrderedDict([
         ('object_cache', os.path.join(user_cache_dir('pystencils'), 'objectcache')),
         ('clear_cache_on_start', False),
diff --git a/pystencils/cpu/vectorization.py b/pystencils/cpu/vectorization.py
index 51fa1a807db17b8ba58fda8bf080658015b33ef8..0af12adaf81584c93bff037705c1edcf67bbab93 100644
--- a/pystencils/cpu/vectorization.py
+++ b/pystencils/cpu/vectorization.py
@@ -6,7 +6,7 @@ import sympy as sp
 from sympy.logic.boolalg import BooleanFunction
 
 import pystencils.astnodes as ast
-from pystencils.backends.simd_instruction_sets import get_vector_instruction_set
+from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set
 from pystencils.data_types import (
     PointerType, TypedSymbol, VectorType, cast_func, collate_types, get_type_of_expression, vector_memory_access)
 from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
@@ -26,7 +26,7 @@ class vec_all(sp.Function):
     nargs = (1,)
 
 
-def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx',
+def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best',
               assume_aligned: bool = False, nontemporal: Union[bool, Container[Union[str, Field]]] = False,
               assume_inner_stride_one: bool = False, assume_sufficient_line_padding: bool = True):
     """Explicit vectorization using SIMD vectorization via intrinsics.
@@ -51,6 +51,11 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx',
                                         depending on the access pattern there might be additional padding
                                         required at the end of the array
     """
+    if instruction_set == 'best':
+        if get_supported_instruction_sets():
+            instruction_set = get_supported_instruction_sets()[-1]
+        else:
+            instruction_set = 'avx'
     if instruction_set is None:
         return
 
diff --git a/pystencils/include/philox_rand.h b/pystencils/include/philox_rand.h
index b4c83669d0fda05aee1a7e018904c927a59c7ad1..2950717738d93f19410dfcc17786a687b8ee8226 100644
--- a/pystencils/include/philox_rand.h
+++ b/pystencils/include/philox_rand.h
@@ -1,11 +1,11 @@
 #include <cstdint>
 
-#if defined(__SSE4_1__) || defined(_MSC_VER)
+#if defined(__SSE2__) || defined(_MSC_VER)
 #include <emmintrin.h> // SSE2
 #endif
 #ifdef __AVX2__
 #include <immintrin.h> // AVX*
-#else
+#elif defined(__SSE4_1__) || defined(_MSC_VER)
 #include <smmintrin.h>  // SSE4
 #ifdef __FMA__
 #include <immintrin.h> // FMA
diff --git a/pystencils_tests/test_basic_usage_llvm.ipynb b/pystencils_tests/test_basic_usage_llvm.ipynb
index 4b7f843410cee16c39ee96c106ebbf1a7f9ac4ac..623bad6601f64a3f26521df74bb747359ee207ff 100644
--- a/pystencils_tests/test_basic_usage_llvm.ipynb
+++ b/pystencils_tests/test_basic_usage_llvm.ipynb
@@ -10,6 +10,16 @@
     "In this example a simple weighted Jacobi kernel is generated, so the focus remains on the part of LLVM generation."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pytest\n",
+    "pytest.importorskip('llvmlite')"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
diff --git a/pystencils_tests/test_conditional_vec.py b/pystencils_tests/test_conditional_vec.py
index d3fc3dc66b4254e4b78a7699ce23316afb830132..5f22a230d501f1df26d605beba6fad5d1d251971 100644
--- a/pystencils_tests/test_conditional_vec.py
+++ b/pystencils_tests/test_conditional_vec.py
@@ -1,11 +1,15 @@
 import numpy as np
 import sympy as sp
+import pytest
 
 import pystencils as ps
 from pystencils.astnodes import Block, Conditional
+from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets
 from pystencils.cpu.vectorization import vec_all, vec_any
 
 
+@pytest.mark.skipif(not get_supported_instruction_sets(), reason='cannot detect CPU instruction set')
+@pytest.mark.skipif('neon' in get_supported_instruction_sets(), reason='ARM does not have collective instructions')
 def test_vec_any():
     data_arr = np.zeros((15, 15))
 
@@ -19,12 +23,14 @@ def test_vec_any():
         ]))
     ]
     ast = ps.create_kernel(c, target='cpu',
-                           cpu_vectorize_info={'instruction_set': 'avx'})
+                           cpu_vectorize_info={'instruction_set': get_supported_instruction_sets()[-1]})
     kernel = ast.compile()
     kernel(data=data_arr)
     np.testing.assert_equal(data_arr[3:9, 0:8], 2.0)
 
 
+@pytest.mark.skipif(not get_supported_instruction_sets(), reason='cannot detect CPU instruction set')
+@pytest.mark.skipif('neon' in get_supported_instruction_sets(), reason='ARM does not have collective instructions')
 def test_vec_all():
     data_arr = np.zeros((15, 15))
 
@@ -37,13 +43,14 @@ def test_vec_all():
         ]))
     ]
     ast = ps.create_kernel(c, target='cpu',
-                           cpu_vectorize_info={'instruction_set': 'avx'})
+                           cpu_vectorize_info={'instruction_set': get_supported_instruction_sets()[-1]})
     kernel = ast.compile()
     before = data_arr.copy()
     kernel(data=data_arr)
     np.testing.assert_equal(data_arr, before)
 
 
+@pytest.mark.skipif(not get_supported_instruction_sets(), reason='cannot detect CPU instruction set')
 def test_boolean_before_loop():
     t1, t2 = sp.symbols('t1, t2')
     f_arr = np.ones((10, 10))
@@ -55,7 +62,7 @@ def test_boolean_before_loop():
         ps.Assignment(g[0, 0],
                       sp.Piecewise((f[0, 0], t1), (42, True)))
     ]
-    ast = ps.create_kernel(a, cpu_vectorize_info={'instruction_set': 'avx'})
+    ast = ps.create_kernel(a, cpu_vectorize_info={'instruction_set': get_supported_instruction_sets()[-1]})
     kernel = ast.compile()
     kernel(f=f_arr, g=g_arr, t2=1.0)
     print(g)
diff --git a/pystencils_tests/test_dot_printer.ipynb b/pystencils_tests/test_dot_printer.ipynb
index 8b61525f520fddfdc37ac028241c897ffd8c02fd..67c0e14a947167b13ba012cc71fa6d46841f9aba 100644
--- a/pystencils_tests/test_dot_printer.ipynb
+++ b/pystencils_tests/test_dot_printer.ipynb
@@ -1,5 +1,15 @@
 {
  "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pytest\n",
+    "pytest.importorskip('graphviz')"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
diff --git a/pystencils_tests/test_field_coordinates.py b/pystencils_tests/test_field_coordinates.py
index 1aeed343bc0c77d6e6b56b3d48e68c100be3f56c..5d95a308ca5829ba4d5f4ffc4f1022334ac5fd45 100644
--- a/pystencils_tests/test_field_coordinates.py
+++ b/pystencils_tests/test_field_coordinates.py
@@ -27,7 +27,7 @@ try:
     import skimage.io
     lenna = skimage.io.imread(LENNA_FILE, as_gray=True).astype(np.float32)
 except Exception:
-    lenna = np.random.rand(20, 30)
+    lenna = np.random.rand(20, 30).astype(np.float32)
 
 
 def test_rotate_center():
diff --git a/pystencils_tests/test_fvm.py b/pystencils_tests/test_fvm.py
index c289ac54e7492cef31d74867847e2aefb2663d67..c863c8f45fdb31386eda547f6774b16e864879b5 100644
--- a/pystencils_tests/test_fvm.py
+++ b/pystencils_tests/test_fvm.py
@@ -3,7 +3,6 @@ import pystencils as ps
 import numpy as np
 import pytest
 from itertools import product
-from scipy.optimize import curve_fit
 
 
 def advection_diffusion(dim: int):
@@ -94,6 +93,8 @@ def advection_diffusion(dim: int):
             calc_density = density(pos - velocity * time, time, D)
             target = [time, D]
         
+            pytest.importorskip('scipy.optimize')
+            from scipy.optimize import curve_fit
             popt, _ = curve_fit(lambda x, t, D: density(x - velocity * time, t, D),
                                 pos.reshape(-1, dim),
                                 sim_density.reshape(-1),
diff --git a/pystencils_tests/test_random.py b/pystencils_tests/test_random.py
index 856bfd2c5af53f7c01476136173439931b5a347a..30b55a66be082454e48de57284e1ed9678e6e19e 100644
--- a/pystencils_tests/test_random.py
+++ b/pystencils_tests/test_random.py
@@ -175,7 +175,7 @@ def test_staggered(vectorized):
     dh = ps.create_data_handling((8, 8), default_ghost_layers=0, default_target="cpu")
     j = dh.add_array("j", values_per_cell=dh.dim, field_type=ps.FieldType.STAGGERED_FLUX)
     a = ps.AssignmentCollection([ps.Assignment(j.staggered_access(n), 0) for n in j.staggered_stencil])
-    rng_symbol_gen = random_symbol(a.subexpressions, dim=dh.dim, rng_node=AESNITwoDoubles)
+    rng_symbol_gen = random_symbol(a.subexpressions, dim=dh.dim, rng_node=PhiloxTwoDoubles)
     a.main_assignments[0] = ps.Assignment(a.main_assignments[0].lhs, next(rng_symbol_gen))
     kernel = ps.create_staggered_kernel(a, target=dh.default_target).compile()
 
diff --git a/pystencils_tests/test_small_block_benchmark.ipynb b/pystencils_tests/test_small_block_benchmark.ipynb
index 45ab56bbe194952bfbadf937be43952a9ed4a43a..81101c5a0d33e45300300ab24c70c4c464eb5eac 100644
--- a/pystencils_tests/test_small_block_benchmark.ipynb
+++ b/pystencils_tests/test_small_block_benchmark.ipynb
@@ -1,5 +1,15 @@
 {
  "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pytest\n",
+    "pytest.importorskip('waLBerla')"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
diff --git a/pystencils_tests/test_vectorization.py b/pystencils_tests/test_vectorization.py
index 7b1891a4def311f20ae4a7f7932f83a5b0c821e2..40432f5b6b8e6a17dd6c03ef25d9c9800dfa8a45 100644
--- a/pystencils_tests/test_vectorization.py
+++ b/pystencils_tests/test_vectorization.py
@@ -203,7 +203,7 @@ def test_logical_operators():
 
 def test_hardware_query():
     instruction_sets = get_supported_instruction_sets()
-    assert 'sse' in instruction_sets
+    assert 'sse' in instruction_sets or 'neon' in instruction_sets
 
 
 def test_vectorised_pow():