test_vectorization.py 9.91 KB
Newer Older
1
2
import numpy as np
import sympy as sp
Martin Bauer's avatar
Martin Bauer committed
3

4
5
6
import pystencils as ps
from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets
from pystencils.cpu.vectorization import vectorize
7
from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions
Jan Hönig's avatar
Jan Hönig committed
8
from pystencils.enums import Target
9
10
from pystencils.transformations import replace_inner_stride_with_one

Markus Holzer's avatar
Markus Holzer committed
11
12
13
14
15
supported_instruction_sets = get_supported_instruction_sets()
if supported_instruction_sets:
    instruction_set = supported_instruction_sets[-1]
else:
    instruction_set = None
16

Markus Holzer's avatar
Markus Holzer committed
17

18
def test_vector_type_propagation(instruction_set=instruction_set):
19
20
21
22
23
24
25
26
27
28
    a, b, c, d, e = sp.symbols("a b c d e")
    arr = np.ones((2 ** 2 + 2, 2 ** 3 + 2))
    arr *= 10.0

    f, g = ps.fields(f=arr, g=arr)
    update_rule = [ps.Assignment(a, f[1, 0]),
                   ps.Assignment(b, a),
                   ps.Assignment(g[0, 0], b + 3 + f[0, 1])]

    ast = ps.create_kernel(update_rule)
Markus Holzer's avatar
Markus Holzer committed
29
    vectorize(ast, instruction_set=instruction_set)
30
31
32
33
34
35
36

    func = ast.compile()
    dst = np.zeros_like(arr)
    func(g=dst, f=arr)
    np.testing.assert_equal(dst[1:-1, 1:-1], 2 * 10.0 + 3)


37
def test_aligned_and_nt_stores(instruction_set=instruction_set, openmp=False):
Markus Holzer's avatar
Markus Holzer committed
38
39
    domain_size = (24, 24)
    # create a datahandling object
Jan Hönig's avatar
Jan Hönig committed
40
    dh = ps.create_data_handling(domain_size, periodicity=(True, True), parallel=False, default_target=Target.CPU)
Markus Holzer's avatar
Markus Holzer committed
41

42
    # fields
43
    alignment = 'cacheline' if openmp else True
44
45
    g = dh.add_array("g", values_per_cell=1, alignment=alignment)
    dh.fill("g", 1.0, ghost_layers=True)
46
    f = dh.add_array("f", values_per_cell=1, alignment=alignment)
Markus Holzer's avatar
Markus Holzer committed
47
48
49
50
    dh.fill("f", 0.0, ghost_layers=True)
    opt = {'instruction_set': instruction_set, 'assume_aligned': True, 'nontemporal': True,
           'assume_inner_stride_one': True}
    update_rule = [ps.Assignment(f.center(), 0.25 * (g[-1, 0] + g[1, 0] + g[0, -1] + g[0, 1]))]
Jan Hönig's avatar
Jan Hönig committed
51
52
    config = ps.CreateKernelConfig(target=dh.default_target, cpu_vectorize_info=opt, cpu_openmp=openmp)
    ast = ps.create_kernel(update_rule, config=config)
Michael Kuron's avatar
Michael Kuron committed
53
54
55
56
57
58
59
60
    if instruction_set in ['sse'] or instruction_set.startswith('avx'):
        assert 'stream' in ast.instruction_set
        assert 'streamFence' in ast.instruction_set
    if instruction_set in ['neon', 'vsx'] or instruction_set.startswith('sve'):
        assert 'cachelineZero' in ast.instruction_set
    if instruction_set in ['vsx']:
        assert 'storeAAndFlushCacheline' in ast.instruction_set
    for instruction in ['stream', 'streamFence', 'cachelineZero', 'storeAAndFlushCacheline', 'flushCacheline']:
61
62
        if instruction in ast.instruction_set:
            assert ast.instruction_set[instruction].split('{')[0] in ps.get_code_str(ast)
Markus Holzer's avatar
Markus Holzer committed
63
64
65
66
67
    kernel = ast.compile()

    dh.run_kernel(kernel)
    np.testing.assert_equal(np.sum(dh.cpu_arrays['f']), np.prod(domain_size))

68

69
70
def test_aligned_and_nt_stores_openmp(instruction_set=instruction_set):
    test_aligned_and_nt_stores(instruction_set, True)
71

Markus Holzer's avatar
Markus Holzer committed
72

73
def test_inplace_update(instruction_set=instruction_set):
74
75
76
77
78
79
80
81
82
83
84
85
86
87
    shape = (9, 9, 3)
    arr = np.ones(shape, order='f')

    @ps.kernel
    def update_rule(s):
        f = ps.fields("f(3) : [2D]", f=arr)
        s.tmp0 @= f(0)
        s.tmp1 @= f(1)
        s.tmp2 @= f(2)
        f0, f1, f2 = f(0), f(1), f(2)
        f0 @= 2 * s.tmp0
        f1 @= 2 * s.tmp0
        f2 @= 2 * s.tmp0

Jan Hönig's avatar
Jan Hönig committed
88
89
    config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set})
    ast = ps.create_kernel(update_rule, config=config)
90
91
92
93
    kernel = ast.compile()
    kernel(f=arr)
    np.testing.assert_equal(arr, 2)

94

95
def test_vectorization_fixed_size(instruction_set=instruction_set):
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
    configurations = []
    # Fixed size - multiple of four
    arr = np.ones((20 + 2, 24 + 2)) * 5.0
    f, g = ps.fields(f=arr, g=arr)
    configurations.append((arr, f, g))
    # Fixed size - no multiple of four
    arr = np.ones((21 + 2, 25 + 2)) * 5.0
    f, g = ps.fields(f=arr, g=arr)
    configurations.append((arr, f, g))
    # Fixed size - different remainder
    arr = np.ones((23 + 2, 17 + 2)) * 5.0
    f, g = ps.fields(f=arr, g=arr)
    configurations.append((arr, f, g))

    for arr, f, g in configurations:
        update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)]

        ast = ps.create_kernel(update_rule)
Markus Holzer's avatar
Markus Holzer committed
114
        vectorize(ast, instruction_set=instruction_set)
115
116
117
118
119
120
121

        func = ast.compile()
        dst = np.zeros_like(arr)
        func(g=dst, f=arr)
        np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0)


122
def test_vectorization_variable_size(instruction_set=instruction_set):
123
124
125
126
127
    f, g = ps.fields("f, g : double[2D]")
    update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)]
    ast = ps.create_kernel(update_rule)

    replace_inner_stride_with_one(ast)
Markus Holzer's avatar
Markus Holzer committed
128
    vectorize(ast, instruction_set=instruction_set)
129
130
131
132
133
134
135
136
137
    func = ast.compile()

    arr = np.ones((23 + 2, 17 + 2)) * 5.0
    dst = np.zeros_like(arr)

    func(g=dst, f=arr)
    np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0)


138
def test_piecewise1(instruction_set=instruction_set):
139
140
141
142
143
144
145
146
147
148
    a, b, c, d, e = sp.symbols("a b c d e")
    arr = np.ones((2 ** 3 + 2, 2 ** 4 + 2)) * 5.0

    f, g = ps.fields(f=arr, g=arr)
    update_rule = [ps.Assignment(a, f[1, 0]),
                   ps.Assignment(b, a),
                   ps.Assignment(c, f[0, 0] > 0.0),
                   ps.Assignment(g[0, 0], sp.Piecewise((b + 3 + f[0, 1], c), (0.0, True)))]

    ast = ps.create_kernel(update_rule)
Markus Holzer's avatar
Markus Holzer committed
149
    vectorize(ast, instruction_set=instruction_set)
150
151
152
153
154
155
    func = ast.compile()
    dst = np.zeros_like(arr)
    func(g=dst, f=arr)
    np.testing.assert_equal(dst[1:-1, 1:-1], 5 + 3 + 5.0)


156
def test_piecewise2(instruction_set=instruction_set):
157
158
159
160
161
162
163
164
165
166
167
    arr = np.zeros((20, 20))

    @ps.kernel
    def test_kernel(s):
        f, g = ps.fields(f=arr, g=arr)

        s.condition @= f[0, 0] > 1
        s.result    @= 0.0 if s.condition else 1.0
        g[0, 0]     @= s.result

    ast = ps.create_kernel(test_kernel)
Markus Holzer's avatar
Markus Holzer committed
168
    vectorize(ast, instruction_set=instruction_set)
169
170
171
172
173
    func = ast.compile()
    func(f=arr, g=arr)
    np.testing.assert_equal(arr, np.ones_like(arr))


174
def test_piecewise3(instruction_set=instruction_set):
175
176
177
178
179
180
181
182
183
    arr = np.zeros((22, 22))

    @ps.kernel
    def test_kernel(s):
        f, g = ps.fields(f=arr, g=arr)
        s.b     @= f[0, 1]
        g[0, 0] @= 1.0 / (s.b + s.k) if f[0, 0] > 0.0 else 1.0

    ast = ps.create_kernel(test_kernel)
Markus Holzer's avatar
Markus Holzer committed
184
    vectorize(ast, instruction_set=instruction_set)
185
186
187
    ast.compile()


188
def test_logical_operators(instruction_set=instruction_set):
189
190
191
    arr = np.zeros((22, 22))

    @ps.kernel
192
    def kernel_and(s):
193
194
195
196
        f, g = ps.fields(f=arr, g=arr)
        s.c @= sp.And(f[0, 1] < 0.0, f[1, 0] < 0.0)
        g[0, 0] @= sp.Piecewise([1.0 / f[1, 0], s.c], [1.0, True])

197
    ast = ps.create_kernel(kernel_and)
Markus Holzer's avatar
Markus Holzer committed
198
    vectorize(ast, instruction_set=instruction_set)
199
200
201
202
203
204
205
206
207
    ast.compile()

    @ps.kernel
    def kernel_or(s):
        f, g = ps.fields(f=arr, g=arr)
        s.c @= sp.Or(f[0, 1] < 0.0, f[1, 0] < 0.0)
        g[0, 0] @= sp.Piecewise([1.0 / f[1, 0], s.c], [1.0, True])

    ast = ps.create_kernel(kernel_or)
Markus Holzer's avatar
Markus Holzer committed
208
    vectorize(ast, instruction_set=instruction_set)
209
210
211
212
213
214
215
216
217
    ast.compile()

    @ps.kernel
    def kernel_equal(s):
        f, g = ps.fields(f=arr, g=arr)
        s.c @= sp.Eq(f[0, 1], 2.0)
        g[0, 0] @= sp.Piecewise([1.0 / f[1, 0], s.c], [1.0, True])

    ast = ps.create_kernel(kernel_equal)
Markus Holzer's avatar
Markus Holzer committed
218
    vectorize(ast, instruction_set=instruction_set)
219
220
221
222
    ast.compile()


def test_hardware_query():
Michael Kuron's avatar
Michael Kuron committed
223
    assert set(['sse', 'neon', 'sve', 'vsx', 'rvv']).intersection(supported_instruction_sets)
224
225


226
def test_vectorised_pow(instruction_set=instruction_set):
227
228
229
230
231
232
233
234
235
236
237
    arr = np.zeros((24, 24))
    f, g = ps.fields(f=arr, g=arr)

    as1 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], 2))
    as2 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], 0.5))
    as3 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], -0.5))
    as4 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], 4))
    as5 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], -4))
    as6 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], -1))

    ast = ps.create_kernel(as1)
Markus Holzer's avatar
Markus Holzer committed
238
    vectorize(ast, instruction_set=instruction_set)
239
240
241
    ast.compile()

    ast = ps.create_kernel(as2)
Markus Holzer's avatar
Markus Holzer committed
242
    vectorize(ast, instruction_set=instruction_set)
243
244
245
    ast.compile()

    ast = ps.create_kernel(as3)
Markus Holzer's avatar
Markus Holzer committed
246
    vectorize(ast, instruction_set=instruction_set)
247
248
249
    ast.compile()

    ast = ps.create_kernel(as4)
Markus Holzer's avatar
Markus Holzer committed
250
    vectorize(ast, instruction_set=instruction_set)
251
252
253
    ast.compile()

    ast = ps.create_kernel(as5)
Markus Holzer's avatar
Markus Holzer committed
254
    vectorize(ast, instruction_set=instruction_set)
255
256
257
    ast.compile()

    ast = ps.create_kernel(as6)
Markus Holzer's avatar
Markus Holzer committed
258
    vectorize(ast, instruction_set=instruction_set)
259
260
261
    ast.compile()


262
def test_vectorised_fast_approximations(instruction_set=instruction_set):
263
264
265
266
267
268
    arr = np.zeros((24, 24))
    f, g = ps.fields(f=arr, g=arr)

    expr = sp.sqrt(f[0, 0] + f[1, 0])
    assignment = ps.Assignment(g[0, 0], insert_fast_sqrts(expr))
    ast = ps.create_kernel(assignment)
Markus Holzer's avatar
Markus Holzer committed
269
    vectorize(ast, instruction_set=instruction_set)
270
271
272
273
274
    ast.compile()

    expr = f[0, 0] / f[1, 0]
    assignment = ps.Assignment(g[0, 0], insert_fast_divisions(expr))
    ast = ps.create_kernel(assignment)
Markus Holzer's avatar
Markus Holzer committed
275
    vectorize(ast, instruction_set=instruction_set)
276
277
278
279
    ast.compile()

    assignment = ps.Assignment(sp.Symbol("tmp"), 3 / sp.sqrt(f[0, 0] + f[1, 0]))
    ast = ps.create_kernel(insert_fast_sqrts(assignment))
Markus Holzer's avatar
Markus Holzer committed
280
    vectorize(ast, instruction_set=instruction_set)
281
    ast.compile()
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297


def test_issue40(*_):
    """https://i10git.cs.fau.de/pycodegen/pystencils/-/issues/40"""
    opt = {'instruction_set': "avx512", 'assume_aligned': False,
           'nontemporal': False, 'assume_inner_stride_one': True}

    src = ps.fields("src(1): double[2D]", layout='fzyx')
    eq = [ps.Assignment(sp.Symbol('rho'), 1.0),
          ps.Assignment(src[0, 0](0), sp.Rational(4, 9) * sp.Symbol('rho'))]

    config = ps.CreateKernelConfig(target=Target.CPU, cpu_vectorize_info=opt, data_type='float64')
    ast = ps.create_kernel(eq, config=config)

    code = ps.get_code_str(ast)
    assert 'epi32' not in code