test_random.py 9.89 KB
Newer Older
1
import numpy as np
Martin Bauer's avatar
Martin Bauer committed
2

Michael Kuron's avatar
Michael Kuron committed
3
4
import pytest

5
import pystencils as ps
6
from pystencils.rng import PhiloxFourFloats, PhiloxTwoDoubles, AESNIFourFloats, AESNITwoDoubles, random_symbol
7
8
9
10
11
12
13
14
15
from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets
from pystencils.cpu.cpujit import get_compiler_config
from pystencils.data_types import TypedSymbol

RNGs = {('philox', 'float'): PhiloxFourFloats, ('philox', 'double'): PhiloxTwoDoubles,
        ('aesni', 'float'): AESNIFourFloats, ('aesni', 'double'): AESNITwoDoubles}

instruction_sets = get_supported_instruction_sets()
if get_compiler_config()['os'] == 'windows':
Michael Kuron's avatar
Michael Kuron committed
16
    # skip instruction sets supported by the CPU but not by the compiler
17
    if 'avx' in instruction_sets and ('/arch:avx2' not in get_compiler_config()['flags'].lower()
Michael Kuron's avatar
Michael Kuron committed
18
                                      and '/arch:avx512' not in get_compiler_config()['flags'].lower()):
19
20
21
22
23
        instruction_sets.remove('avx')
    if 'avx512' in instruction_sets and '/arch:avx512' not in get_compiler_config()['flags'].lower():
        instruction_sets.remove('avx512')


Michael Kuron's avatar
Michael Kuron committed
24
@pytest.mark.parametrize('target,rng', (('cpu', 'philox'), ('cpu', 'aesni'), ('gpu', 'philox'), ('opencl', 'philox')))
25
26
27
@pytest.mark.parametrize('precision', ('float', 'double'))
@pytest.mark.parametrize('dtype', ('float', 'double'))
def test_rng(target, rng, precision, dtype, t=124, offsets=(0, 0), keys=(0, 0), offset_values=None):
28
29
    if target == 'gpu':
        pytest.importorskip('pycuda')
Michael Kuron's avatar
Michael Kuron committed
30
31
32
33
    if target == 'opencl':
        pytest.importorskip('pyopencl')
        from pystencils.opencl.opencljit import init_globally
        init_globally()
Michael Kuron's avatar
Michael Kuron committed
34
    if instruction_sets and set(['neon', 'sve', 'vsx', 'rvv']).intersection(instruction_sets) and rng == 'aesni':
Michael Kuron's avatar
Michael Kuron committed
35
        pytest.xfail('AES not yet implemented for this architecture')
36
37
38
39
    if rng == 'aesni' and len(keys) == 2:
        keys *= 2
    if offset_values is None:
        offset_values = offsets
Michael Kuron's avatar
Michael Kuron committed
40

41
    dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target=target)
42
43
44
    f = dh.add_array("f", values_per_cell=4 if precision == 'float' else 2,
                     dtype=np.float32 if dtype == 'float' else np.float64)
    dh.fill(f.name, 42.0)
45

46
47
    rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets, keys=keys)
    assignments = [rng_node] + [ps.Assignment(f(i), s) for i, s in enumerate(rng_node.result_symbols)]
48
    kernel = ps.create_kernel(assignments, target=dh.default_target).compile()
49

50
    dh.all_to_gpu()
51
52
53
54
    kwargs = {'time_step': t}
    if offset_values != offsets:
        kwargs.update({k.name: v for k, v in zip(offsets, offset_values)})
    dh.run_kernel(kernel, **kwargs)
55
    dh.all_to_cpu()
56
    arr = dh.gather_array(f.name)
57
    assert np.logical_and(arr <= 1.0, arr >= 0).all()
58

59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
    if rng == 'philox' and t == 124 and offsets == (0, 0) and keys == (0, 0) and dh.shape == (2, 2):
        int_reference = np.array([[[3576608082, 1252663339, 1987745383, 348040302],
                                   [1032407765, 970978240, 2217005168, 2424826293]],
                                  [[2958765206, 3725192638, 2623672781, 1373196132],
                                   [850605163, 1694561295, 3285694973, 2799652583]]])
    else:
        pytest.importorskip('randomgen')
        if rng == 'aesni':
            from randomgen import AESCounter
            int_reference = np.empty(dh.shape + (4,), dtype=int)
            for x in range(dh.shape[0]):
                for y in range(dh.shape[1]):
                    r = AESCounter(counter=t + (x + offset_values[0]) * 2 ** 32 + (y + offset_values[1]) * 2 ** 64,
                                   key=keys[0] + keys[1] * 2 ** 32 + keys[2] * 2 ** 64 + keys[3] * 2 ** 96,
                                   mode="sequence")
                    a, b = r.random_raw(size=2)
                    int_reference[x, y, :] = [a % 2 ** 32, a // 2 ** 32, b % 2 ** 32, b // 2 ** 32]
        else:
            from randomgen import Philox
            int_reference = np.empty(dh.shape + (4,), dtype=int)
            for x in range(dh.shape[0]):
                for y in range(dh.shape[1]):
Markus Holzer's avatar
Markus Holzer committed
81
                    r = Philox(counter=t + (x + offset_values[0]) * 2 ** 32 + (y + offset_values[1]) * 2 ** 64 - 1,
82
                               key=keys[0] + keys[1] * 2 ** 32, number=4, width=32, mode="sequence")
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
                    int_reference[x, y, :] = r.random_raw(size=4)

    if precision == 'float' or dtype == 'float':
        eps = np.finfo(np.float32).eps
    else:
        eps = np.finfo(np.float64).eps
    if rng == 'aesni':  # precision appears to be slightly worse
        eps = max(1e-12, 2 * eps)

    if precision == 'float':
        reference = int_reference * 2. ** -32 + 2. ** -33
    else:
        x = int_reference[:, :, 0::2]
        y = int_reference[:, :, 1::2]
        z = x ^ y << (53 - 32)
        reference = z * 2. ** -53 + 2. ** -54
    assert np.allclose(arr, reference, rtol=0, atol=eps)


@pytest.mark.parametrize('vectorized', (False, True))
@pytest.mark.parametrize('kind', ('value', 'symbol'))
def test_rng_offsets(kind, vectorized):
    if vectorized:
        test = test_rng_vectorized
        if not instruction_sets:
            pytest.skip("cannot detect CPU instruction set")
    else:
        test = test_rng
    if kind == 'value':
Michael Kuron's avatar
Michael Kuron committed
112
        test(instruction_sets[-1] if vectorized else 'cpu', 'philox', 'float', 'float', t=8,
113
114
115
             offsets=(6, 7), keys=(5, 309))
    elif kind == 'symbol':
        offsets = (TypedSymbol("x0", np.uint32), TypedSymbol("y0", np.uint32))
Michael Kuron's avatar
Michael Kuron committed
116
        test(instruction_sets[-1] if vectorized else 'cpu', 'philox', 'float', 'float', t=8,
117
118
119
120
121
122
123
             offsets=offsets, offset_values=(6, 7), keys=(5, 309))


@pytest.mark.parametrize('target', instruction_sets)
@pytest.mark.parametrize('rng', ('philox', 'aesni'))
@pytest.mark.parametrize('precision,dtype', (('float', 'float'), ('double', 'double')))
def test_rng_vectorized(target, rng, precision, dtype, t=130, offsets=(1, 3), keys=(0, 0), offset_values=None):
Michael Kuron's avatar
Michael Kuron committed
124
    if (target in ['neon', 'vsx', 'rvv'] or target.startswith('sve')) and rng == 'aesni':
Michael Kuron's avatar
Michael Kuron committed
125
        pytest.xfail('AES not yet implemented for this architecture')
126
127
    cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': True, 'instruction_set': target}

Michael Kuron's avatar
Michael Kuron committed
128
    dh = ps.create_data_handling((131, 131), default_ghost_layers=0, default_target='cpu')
129
130
131
132
133
134
135
    f = dh.add_array("f", values_per_cell=4 if precision == 'float' else 2,
                     dtype=np.float32 if dtype == 'float' else np.float64, alignment=True)
    dh.fill(f.name, 42.0)
    ref = dh.add_array("ref", values_per_cell=4 if precision == 'float' else 2)

    rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets)
    assignments = [rng_node] + [ps.Assignment(ref(i), s) for i, s in enumerate(rng_node.result_symbols)]
136
    kernel = ps.create_kernel(assignments, target=dh.default_target).compile()
137

138
139
140
141
    kwargs = {'time_step': t}
    if offset_values is not None:
        kwargs.update({k.name: v for k, v in zip(offsets, offset_values)})
    dh.run_kernel(kernel, **kwargs)
Michael Kuron's avatar
Michael Kuron committed
142

143
144
145
    rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets)
    assignments = [rng_node] + [ps.Assignment(f(i), s) for i, s in enumerate(rng_node.result_symbols)]
    kernel = ps.create_kernel(assignments, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile()
Michael Kuron's avatar
Michael Kuron committed
146

147
    dh.run_kernel(kernel, **kwargs)
Michael Kuron's avatar
Michael Kuron committed
148

149
150
    ref_data = dh.gather_array(ref.name)
    data = dh.gather_array(f.name)
Michael Kuron's avatar
Michael Kuron committed
151

152
    assert np.allclose(ref_data, data)
Michael Kuron's avatar
Michael Kuron committed
153

154

155
156
157
158
159
160
161
162
@pytest.mark.parametrize('vectorized', (False, True))
def test_rng_symbol(vectorized):
    """Make sure that the RNG symbol generator generates symbols and that the resulting code compiles"""
    if vectorized:
        if not instruction_sets:
            pytest.skip("cannot detect CPU instruction set")
        else:
            cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': True,
Michael Kuron's avatar
Michael Kuron committed
163
                                  'instruction_set': instruction_sets[-1]}
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
    else:
        cpu_vectorize_info = None
    
    dh = ps.create_data_handling((8, 8), default_ghost_layers=0, default_target="cpu")
    f = dh.add_array("f", values_per_cell=2 * dh.dim, alignment=True)
    ac = ps.AssignmentCollection([ps.Assignment(f(i), 0) for i in range(f.shape[-1])])
    rng_symbol_gen = random_symbol(ac.subexpressions, dim=dh.dim)
    for i in range(f.shape[-1]):
        ac.main_assignments[i] = ps.Assignment(ac.main_assignments[i].lhs, next(rng_symbol_gen))
    symbols = [a.rhs for a in ac.main_assignments]
    assert len(symbols) == f.shape[-1] and len(set(symbols)) == f.shape[-1]
    ps.create_kernel(ac, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile()


@pytest.mark.parametrize('vectorized', (False, True))
def test_staggered(vectorized):
180
    """Make sure that the RNG counter can be substituted during loop cutting"""
181

182
183
184
    dh = ps.create_data_handling((8, 8), default_ghost_layers=0, default_target="cpu")
    j = dh.add_array("j", values_per_cell=dh.dim, field_type=ps.FieldType.STAGGERED_FLUX)
    a = ps.AssignmentCollection([ps.Assignment(j.staggered_access(n), 0) for n in j.staggered_stencil])
Michael Kuron's avatar
Michael Kuron committed
185
    rng_symbol_gen = random_symbol(a.subexpressions, dim=dh.dim, rng_node=PhiloxTwoDoubles)
186
187
    a.main_assignments[0] = ps.Assignment(a.main_assignments[0].lhs, next(rng_symbol_gen))
    kernel = ps.create_staggered_kernel(a, target=dh.default_target).compile()
188
189
190
191
192
193
194

    if not vectorized:
        return
    if not instruction_sets:
        pytest.skip("cannot detect CPU instruction set")
    pytest.importorskip('islpy')
    cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': False,
Michael Kuron's avatar
Michael Kuron committed
195
                          'instruction_set': instruction_sets[-1]}
196
197
198
199
200
201
202
203
204
205
206
207
    
    dh.fill(j.name, 867)
    dh.run_kernel(kernel, seed=5, time_step=309)
    ref_data = dh.gather_array(j.name)
    
    kernel2 = ps.create_staggered_kernel(a, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile()

    dh.fill(j.name, 867)
    dh.run_kernel(kernel2, seed=5, time_step=309)
    data = dh.gather_array(j.name)
    
    assert np.allclose(ref_data, data)