From 7e6be86ea0ad6d80659b9c6637e35d3bfd3f69e0 Mon Sep 17 00:00:00 2001
From: Michael Kuron <m.kuron@gmx.de>
Date: Thu, 6 May 2021 18:43:46 +0200
Subject: [PATCH] Add ARM SVE CI job

---
 .gitlab-ci.yml                                | 20 +++++++++++++++++--
 pystencils/cpu/vectorization.py               |  3 +++
 .../test_vectorization_specific.py            |  2 +-
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 9ae978824..2ed64515b 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -151,12 +151,11 @@ ubuntu:
       cobertura: coverage.xml
       junit: report.xml
 
-arm64:
+arm64v8:
   extends: .multiarch_template
   image: i10git.cs.fau.de:5005/pycodegen/pycodegen/arm64
   variables:
     PYSTENCILS_SIMD: "neon"
-    QEMU_CPU: "cortex-a72"
   before_script:
     - *multiarch_before_script
     - sed -i s/march=native/march=armv8-a/g ~/.config/pystencils/config.json
@@ -170,6 +169,23 @@ ppc64le:
     - *multiarch_before_script
     - sed -i s/mcpu=native/mcpu=power8/g ~/.config/pystencils/config.json
 
+arm64v9:
+  # Compiler support for SVE is still pretty rough: GCC 10+11 produce incorrect code for fixed-width vectors,
+  # while Clang 12 produces memory-corrupting heisenbugs unless we enable the address sanitizer.
+  # In the RNG tests, GCC 10+11 produce an internal compiler error.
+  # The memory corruption seems to only happen with qemu-user, not with qemu-system.
+  # Once the compilers and QEMU have improved, this job should be cleaned up to match the others.
+  extends: .multiarch_template
+  image: i10git.cs.fau.de:5005/pycodegen/pycodegen/arm64
+  variables:
+    PYSTENCILS_SIMD: "sve256,sve512"
+    ASAN_OPTIONS: detect_leaks=0
+    LD_PRELOAD: /usr/lib/aarch64-linux-gnu/libasan.so.6
+  before_script:
+    - *multiarch_before_script
+    - sed -i s/march=native/march=armv8-a+sve/g ~/.config/pystencils/config.json
+    - sed -i s/g\+\+/clang++/g ~/.config/pystencils/config.json
+
 minimal-conda:
   stage: test
   except:
diff --git a/pystencils/cpu/vectorization.py b/pystencils/cpu/vectorization.py
index c82c93588..6ab821f4e 100644
--- a/pystencils/cpu/vectorization.py
+++ b/pystencils/cpu/vectorization.py
@@ -69,6 +69,9 @@ class CachelineSize(ast.Node):
     def __eq__(self, other):
         return isinstance(other, CachelineSize)
 
+    def __hash__(self):
+        return hash(self.symbol)
+
 
 def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best',
               assume_aligned: bool = False, nontemporal: Union[bool, Container[Union[str, Field]]] = False,
diff --git a/pystencils_tests/test_vectorization_specific.py b/pystencils_tests/test_vectorization_specific.py
index f579b4e46..16780f147 100644
--- a/pystencils_tests/test_vectorization_specific.py
+++ b/pystencils_tests/test_vectorization_specific.py
@@ -117,7 +117,7 @@ def test_cacheline_size(instruction_set):
 
 # test_vectorization is not parametrized because it is supposed to run without pytest, so we parametrize it here
 from pystencils_tests import test_vectorization
-@pytest.mark.parametrize('instruction_set', set(supported_instruction_sets) - set([test_vectorization.instruction_set]))
+@pytest.mark.parametrize('instruction_set', sorted(set(supported_instruction_sets) - set([test_vectorization.instruction_set])))
 @pytest.mark.parametrize('function', [f for f in test_vectorization.__dict__ if f.startswith('test_') and f != 'test_hardware_query'])
 def test_vectorization_other(instruction_set, function):
     test_vectorization.__dict__[function](instruction_set)
-- 
GitLab