From 78c2cbc0cdc4d5fc8371501d20df363eac115739 Mon Sep 17 00:00:00 2001
From: Michael Kuron <m.kuron@gmx.de>
Date: Tue, 4 May 2021 22:40:04 +0200
Subject: [PATCH] Add CI job for non-x86 vectorization

---
 .gitlab-ci.yml                               | 46 ++++++++++++++++++++
 pystencils/backends/simd_instruction_sets.py |  3 ++
 pystencils/include/arm_neon_helpers.h        |  2 +-
 3 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 6cc63ae35..9ae978824 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -124,6 +124,52 @@ ubuntu:
     reports:
       junit: report.xml
 
+.multiarch_template:
+  stage: test
+  except:
+    variables:
+      - $ENABLE_NIGHTLY_BUILDS
+  before_script: &multiarch_before_script
+    - python3 -c "import pystencils as ps; ps.cpu.cpujit.read_config()"
+    - sed -i '/^fail_under.*/d' pytest.ini
+  script:
+    - export NUM_CORES=$(nproc --all)
+    - mkdir -p ~/.config/matplotlib
+    - echo "backend:template" > ~/.config/matplotlib/matplotlibrc
+    - sed -i 's/--doctest-modules //g' pytest.ini
+    - env
+    - pip3 list
+    - pytest-3 -v -n $NUM_CORES --cov-report html --cov-report term --cov=. --junitxml=report.xml pystencils_tests/test_*vec*.py pystencils_tests/test_random.py
+    - python3 -m coverage xml
+  tags:
+    - docker
+  artifacts:
+    when: always
+    paths:
+      - coverage_report
+    reports:
+      cobertura: coverage.xml
+      junit: report.xml
+
+arm64:
+  extends: .multiarch_template
+  image: i10git.cs.fau.de:5005/pycodegen/pycodegen/arm64
+  variables:
+    PYSTENCILS_SIMD: "neon"
+    QEMU_CPU: "cortex-a72"
+  before_script:
+    - *multiarch_before_script
+    - sed -i s/march=native/march=armv8-a/g ~/.config/pystencils/config.json
+
+ppc64le:
+  extends: .multiarch_template
+  image: i10git.cs.fau.de:5005/pycodegen/pycodegen/ppc64le
+  variables:
+    PYSTENCILS_SIMD: "vsx"
+  before_script:
+    - *multiarch_before_script
+    - sed -i s/mcpu=native/mcpu=power8/g ~/.config/pystencils/config.json
+
 minimal-conda:
   stage: test
   except:
diff --git a/pystencils/backends/simd_instruction_sets.py b/pystencils/backends/simd_instruction_sets.py
index b552da0e9..0b982814a 100644
--- a/pystencils/backends/simd_instruction_sets.py
+++ b/pystencils/backends/simd_instruction_sets.py
@@ -1,4 +1,5 @@
 import math
+import os
 import platform
 from ctypes import CDLL
 
@@ -25,6 +26,8 @@ def get_supported_instruction_sets():
     global _cache
     if _cache is not None:
         return _cache.copy()
+    if 'PYSTENCILS_SIMD' in os.environ:
+        return os.environ['PYSTENCILS_SIMD'].split(',')
     if platform.system() == 'Darwin' and platform.machine() == 'arm64':  # not supported by cpuinfo
         return ['neon']
     elif platform.machine().startswith('ppc64'):  # no flags reported by cpuinfo
diff --git a/pystencils/include/arm_neon_helpers.h b/pystencils/include/arm_neon_helpers.h
index a900001e7..a27b8ff6f 100644
--- a/pystencils/include/arm_neon_helpers.h
+++ b/pystencils/include/arm_neon_helpers.h
@@ -32,7 +32,7 @@ inline int32x4_t makeVec_s32(int a, int b, int c, int d)
 #endif
 
 inline void cachelineZero(void * p) {
-	__asm__ volatile("dc zva, %0"::"r"(p));
+	__asm__ volatile("dc zva, %0"::"r"(p):"memory");
 }
 
 inline size_t _cachelineSize() {
-- 
GitLab