test_cudagpu.py 6.13 KB
Newer Older
1
2
import numpy as np
import sympy as sp
3
from pystencils import Field, Assignment, fields
4
5
6
from pystencils.simp import sympy_cse_on_assignment_list
from pystencils.gpucuda.indexing import LineIndexing
from pystencils.slicing import remove_ghost_layers, add_ghost_layers, make_slice
7
from pystencils.gpucuda import make_python_function, create_cuda_kernel, BlockIndexing
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import pycuda.gpuarray as gpuarray
from scipy.ndimage import convolve


def test_averaging_kernel():
    size = (40, 55)
    src_arr = np.random.rand(*size)
    src_arr = add_ghost_layers(src_arr)
    dst_arr = np.zeros_like(src_arr)
    src_field = Field.create_from_numpy_array('src', src_arr)
    dst_field = Field.create_from_numpy_array('dst', dst_arr)

    update_rule = Assignment(dst_field[0, 0],
                             (src_field[0, 1] + src_field[0, -1] + src_field[1, 0] + src_field[-1, 0]) / 4)

    ast = create_cuda_kernel(sympy_cse_on_assignment_list([update_rule]))
    kernel = make_python_function(ast)

    gpu_src_arr = gpuarray.to_gpu(src_arr)
    gpu_dst_arr = gpuarray.to_gpu(dst_arr)
    kernel(src=gpu_src_arr, dst=gpu_dst_arr)
    gpu_dst_arr.get(dst_arr)

    stencil = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]]) / 4.0
    reference = convolve(remove_ghost_layers(src_arr), stencil, mode='constant', cval=0.0)
    reference = add_ghost_layers(reference)
    np.testing.assert_almost_equal(reference, dst_arr)


def test_variable_sized_fields():
    src_field = Field.create_generic('src', spatial_dimensions=2)
    dst_field = Field.create_generic('dst', spatial_dimensions=2)

    update_rule = Assignment(dst_field[0, 0],
                             (src_field[0, 1] + src_field[0, -1] + src_field[1, 0] + src_field[-1, 0]) / 4)

    ast = create_cuda_kernel(sympy_cse_on_assignment_list([update_rule]))
    kernel = make_python_function(ast)

    size = (3, 3)
    src_arr = np.random.rand(*size)
    src_arr = add_ghost_layers(src_arr)
    dst_arr = np.zeros_like(src_arr)

    gpu_src_arr = gpuarray.to_gpu(src_arr)
    gpu_dst_arr = gpuarray.to_gpu(dst_arr)
    kernel(src=gpu_src_arr, dst=gpu_dst_arr)
    gpu_dst_arr.get(dst_arr)

    stencil = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]]) / 4.0
    reference = convolve(remove_ghost_layers(src_arr), stencil, mode='constant', cval=0.0)
    reference = add_ghost_layers(reference)
    np.testing.assert_almost_equal(reference, dst_arr)


def test_multiple_index_dimensions():
    """Sums along the last axis of a numpy array"""
    src_size = (7, 6, 4)
    dst_size = src_size[:2]
    src_arr = np.asfortranarray(np.random.rand(*src_size))
    dst_arr = np.zeros(dst_size)

    src_field = Field.create_from_numpy_array('src', src_arr, index_dimensions=1)
    dst_field = Field.create_from_numpy_array('dst', dst_arr, index_dimensions=0)

    offset = (-2, -1)
    update_rule = Assignment(dst_field[0, 0],
                             sum([src_field[offset[0], offset[1]](i) for i in range(src_size[-1])]))

    ast = create_cuda_kernel([update_rule])
    kernel = make_python_function(ast)

    gpu_src_arr = gpuarray.to_gpu(src_arr)
    gpu_dst_arr = gpuarray.to_gpu(dst_arr)
    kernel(src=gpu_src_arr, dst=gpu_dst_arr)
    gpu_dst_arr.get(dst_arr)

    reference = np.zeros_like(dst_arr)
    gl = np.max(np.abs(np.array(offset, dtype=int)))
    for x in range(gl, src_size[0]-gl):
        for y in range(gl, src_size[1]-gl):
            reference[x, y] = sum([src_arr[x+offset[0], y+offset[1], i] for i in range(src_size[2])])

    np.testing.assert_almost_equal(reference, dst_arr)


def test_ghost_layer():
    size = (6, 5)
    src_arr = np.ones(size)
    dst_arr = np.zeros_like(src_arr)
    src_field = Field.create_from_numpy_array('src', src_arr, index_dimensions=0)
    dst_field = Field.create_from_numpy_array('dst', dst_arr, index_dimensions=0)

    update_rule = Assignment(dst_field[0, 0], src_field[0, 0])
    ghost_layers = [(1, 2), (2, 1)]
    ast = create_cuda_kernel([update_rule], ghost_layers=ghost_layers, indexing_creator=LineIndexing)
    kernel = make_python_function(ast)

    gpu_src_arr = gpuarray.to_gpu(src_arr)
    gpu_dst_arr = gpuarray.to_gpu(dst_arr)
    kernel(src=gpu_src_arr, dst=gpu_dst_arr)
    gpu_dst_arr.get(dst_arr)

    reference = np.zeros_like(src_arr)
    reference[ghost_layers[0][0]:-ghost_layers[0][1], ghost_layers[1][0]:-ghost_layers[1][1]] = 1
    np.testing.assert_equal(reference, dst_arr)


def test_setting_value():
    arr_cpu = np.arange(25, dtype=np.float64).reshape(5, 5)
    arr_gpu = gpuarray.to_gpu(arr_cpu)

    iteration_slice = make_slice[:, :]
    f = Field.create_generic("f", 2)
    update_rule = [Assignment(f(0), sp.Symbol("value"))]
    ast = create_cuda_kernel(update_rule, iteration_slice=iteration_slice, indexing_creator=LineIndexing)

    kernel = make_python_function(ast)
    kernel(f=arr_gpu, value=np.float64(42.0))
    np.testing.assert_equal(arr_gpu.get(), np.ones((5, 5)) * 42.0)


def test_periodicity():
    from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as periodic_gpu
    from pystencils.slicing import get_periodic_boundary_functor as periodic_cpu

    arr_cpu = np.arange(50, dtype=np.float64).reshape(5, 5, 2)
    arr_gpu = gpuarray.to_gpu(arr_cpu)

    periodicity_stencil = [(1, 0), (-1, 0), (1, 1)]
    periodic_gpu_kernel = periodic_gpu(periodicity_stencil, (5, 5), 1, 2)
    periodic_cpu_kernel = periodic_cpu(periodicity_stencil)

    cpu_result = np.copy(arr_cpu)
    periodic_cpu_kernel(cpu_result)

    gpu_result = np.copy(arr_cpu)
    periodic_gpu_kernel(pdfs=arr_gpu)
    arr_gpu.get(gpu_result)
    np.testing.assert_equal(cpu_result, gpu_result)
148
149
150
151
152


def test_block_size_limiting():
    res = BlockIndexing.limit_block_size_to_device_maximum((4096, 4096, 4096))
    assert all(r < 4096 for r in res)
153
154
155
156
157
158
159
160
161
162


def test_block_indexing():
    f = fields("f: [3D]")
    bi = BlockIndexing(f, make_slice[:, :, :], block_size=(16, 8, 2), permute_block_size_dependent_on_layout=False)
    assert bi.call_parameters((3, 2, 32))['block'] == (3, 2, 32)
    assert bi.call_parameters((32, 2, 32))['block'] == (16, 2, 8)

    bi = BlockIndexing(f, make_slice[:, :, :], block_size=(32, 1, 1), permute_block_size_dependent_on_layout=False)
    assert bi.call_parameters((1, 16, 16))['block'] == (1, 16, 2)