diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 6cc63ae3522001701bd428075240eb6538048c6a..9ae978824d56fcf19fb2c2a216cc170a77e1d0fb 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -124,6 +124,52 @@ ubuntu:
     reports:
       junit: report.xml
 
+.multiarch_template:
+  stage: test
+  except:
+    variables:
+      - $ENABLE_NIGHTLY_BUILDS
+  before_script: &multiarch_before_script
+    - python3 -c "import pystencils as ps; ps.cpu.cpujit.read_config()"
+    - sed -i '/^fail_under.*/d' pytest.ini
+  script:
+    - export NUM_CORES=$(nproc --all)
+    - mkdir -p ~/.config/matplotlib
+    - echo "backend:template" > ~/.config/matplotlib/matplotlibrc
+    - sed -i 's/--doctest-modules //g' pytest.ini
+    - env
+    - pip3 list
+    - pytest-3 -v -n $NUM_CORES --cov-report html --cov-report term --cov=. --junitxml=report.xml pystencils_tests/test_*vec*.py pystencils_tests/test_random.py
+    - python3 -m coverage xml
+  tags:
+    - docker
+  artifacts:
+    when: always
+    paths:
+      - coverage_report
+    reports:
+      cobertura: coverage.xml
+      junit: report.xml
+
+arm64:
+  extends: .multiarch_template
+  image: i10git.cs.fau.de:5005/pycodegen/pycodegen/arm64
+  variables:
+    PYSTENCILS_SIMD: "neon"
+    QEMU_CPU: "cortex-a72"
+  before_script:
+    - *multiarch_before_script
+    - sed -i s/march=native/march=armv8-a/g ~/.config/pystencils/config.json
+
+ppc64le:
+  extends: .multiarch_template
+  image: i10git.cs.fau.de:5005/pycodegen/pycodegen/ppc64le
+  variables:
+    PYSTENCILS_SIMD: "vsx"
+  before_script:
+    - *multiarch_before_script
+    - sed -i s/mcpu=native/mcpu=power8/g ~/.config/pystencils/config.json
+
 minimal-conda:
   stage: test
   except:
diff --git a/pystencils/backends/simd_instruction_sets.py b/pystencils/backends/simd_instruction_sets.py
index b552da0e9ac263721dfbf262c40c8cbb00352f7a..0b982814ad2e9b9379b71fface4f361d49696f65 100644
--- a/pystencils/backends/simd_instruction_sets.py
+++ b/pystencils/backends/simd_instruction_sets.py
@@ -1,4 +1,5 @@
 import math
+import os
 import platform
 from ctypes import CDLL
 
@@ -25,6 +26,8 @@ def get_supported_instruction_sets():
     global _cache
     if _cache is not None:
         return _cache.copy()
+    if 'PYSTENCILS_SIMD' in os.environ:
+        return os.environ['PYSTENCILS_SIMD'].split(',')
     if platform.system() == 'Darwin' and platform.machine() == 'arm64':  # not supported by cpuinfo
         return ['neon']
     elif platform.machine().startswith('ppc64'):  # no flags reported by cpuinfo
diff --git a/pystencils/include/arm_neon_helpers.h b/pystencils/include/arm_neon_helpers.h
index a900001e793392fea66faf427873ce49eb2594d4..a27b8ff6fa9e7244a8a0467315ed06d3985ed7b6 100644
--- a/pystencils/include/arm_neon_helpers.h
+++ b/pystencils/include/arm_neon_helpers.h
@@ -32,7 +32,7 @@ inline int32x4_t makeVec_s32(int a, int b, int c, int d)
 #endif
 
 inline void cachelineZero(void * p) {
-	__asm__ volatile("dc zva, %0"::"r"(p));
+	__asm__ volatile("dc zva, %0"::"r"(p):"memory");
 }
 
 inline size_t _cachelineSize() {