Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import numpy as np
import sympy as sp
from pystencils import Field, Assignment
from pystencils.simp import sympy_cse_on_assignment_list
from pystencils.gpucuda.indexing import LineIndexing
from pystencils.slicing import remove_ghost_layers, add_ghost_layers, make_slice
from pystencils.gpucuda import make_python_function, create_cuda_kernel
import pycuda.gpuarray as gpuarray
from scipy.ndimage import convolve
def test_averaging_kernel():
size = (40, 55)
src_arr = np.random.rand(*size)
src_arr = add_ghost_layers(src_arr)
dst_arr = np.zeros_like(src_arr)
src_field = Field.create_from_numpy_array('src', src_arr)
dst_field = Field.create_from_numpy_array('dst', dst_arr)
update_rule = Assignment(dst_field[0, 0],
(src_field[0, 1] + src_field[0, -1] + src_field[1, 0] + src_field[-1, 0]) / 4)
ast = create_cuda_kernel(sympy_cse_on_assignment_list([update_rule]))
kernel = make_python_function(ast)
gpu_src_arr = gpuarray.to_gpu(src_arr)
gpu_dst_arr = gpuarray.to_gpu(dst_arr)
kernel(src=gpu_src_arr, dst=gpu_dst_arr)
gpu_dst_arr.get(dst_arr)
stencil = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]]) / 4.0
reference = convolve(remove_ghost_layers(src_arr), stencil, mode='constant', cval=0.0)
reference = add_ghost_layers(reference)
np.testing.assert_almost_equal(reference, dst_arr)
def test_variable_sized_fields():
src_field = Field.create_generic('src', spatial_dimensions=2)
dst_field = Field.create_generic('dst', spatial_dimensions=2)
update_rule = Assignment(dst_field[0, 0],
(src_field[0, 1] + src_field[0, -1] + src_field[1, 0] + src_field[-1, 0]) / 4)
ast = create_cuda_kernel(sympy_cse_on_assignment_list([update_rule]))
kernel = make_python_function(ast)
size = (3, 3)
src_arr = np.random.rand(*size)
src_arr = add_ghost_layers(src_arr)
dst_arr = np.zeros_like(src_arr)
gpu_src_arr = gpuarray.to_gpu(src_arr)
gpu_dst_arr = gpuarray.to_gpu(dst_arr)
kernel(src=gpu_src_arr, dst=gpu_dst_arr)
gpu_dst_arr.get(dst_arr)
stencil = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]]) / 4.0
reference = convolve(remove_ghost_layers(src_arr), stencil, mode='constant', cval=0.0)
reference = add_ghost_layers(reference)
np.testing.assert_almost_equal(reference, dst_arr)
def test_multiple_index_dimensions():
"""Sums along the last axis of a numpy array"""
src_size = (7, 6, 4)
dst_size = src_size[:2]
src_arr = np.asfortranarray(np.random.rand(*src_size))
dst_arr = np.zeros(dst_size)
src_field = Field.create_from_numpy_array('src', src_arr, index_dimensions=1)
dst_field = Field.create_from_numpy_array('dst', dst_arr, index_dimensions=0)
offset = (-2, -1)
update_rule = Assignment(dst_field[0, 0],
sum([src_field[offset[0], offset[1]](i) for i in range(src_size[-1])]))
ast = create_cuda_kernel([update_rule])
kernel = make_python_function(ast)
gpu_src_arr = gpuarray.to_gpu(src_arr)
gpu_dst_arr = gpuarray.to_gpu(dst_arr)
kernel(src=gpu_src_arr, dst=gpu_dst_arr)
gpu_dst_arr.get(dst_arr)
reference = np.zeros_like(dst_arr)
gl = np.max(np.abs(np.array(offset, dtype=int)))
for x in range(gl, src_size[0]-gl):
for y in range(gl, src_size[1]-gl):
reference[x, y] = sum([src_arr[x+offset[0], y+offset[1], i] for i in range(src_size[2])])
np.testing.assert_almost_equal(reference, dst_arr)
def test_ghost_layer():
size = (6, 5)
src_arr = np.ones(size)
dst_arr = np.zeros_like(src_arr)
src_field = Field.create_from_numpy_array('src', src_arr, index_dimensions=0)
dst_field = Field.create_from_numpy_array('dst', dst_arr, index_dimensions=0)
update_rule = Assignment(dst_field[0, 0], src_field[0, 0])
ghost_layers = [(1, 2), (2, 1)]
ast = create_cuda_kernel([update_rule], ghost_layers=ghost_layers, indexing_creator=LineIndexing)
kernel = make_python_function(ast)
gpu_src_arr = gpuarray.to_gpu(src_arr)
gpu_dst_arr = gpuarray.to_gpu(dst_arr)
kernel(src=gpu_src_arr, dst=gpu_dst_arr)
gpu_dst_arr.get(dst_arr)
reference = np.zeros_like(src_arr)
reference[ghost_layers[0][0]:-ghost_layers[0][1], ghost_layers[1][0]:-ghost_layers[1][1]] = 1
np.testing.assert_equal(reference, dst_arr)
def test_setting_value():
arr_cpu = np.arange(25, dtype=np.float64).reshape(5, 5)
arr_gpu = gpuarray.to_gpu(arr_cpu)
iteration_slice = make_slice[:, :]
f = Field.create_generic("f", 2)
update_rule = [Assignment(f(0), sp.Symbol("value"))]
ast = create_cuda_kernel(update_rule, iteration_slice=iteration_slice, indexing_creator=LineIndexing)
kernel = make_python_function(ast)
kernel(f=arr_gpu, value=np.float64(42.0))
np.testing.assert_equal(arr_gpu.get(), np.ones((5, 5)) * 42.0)
def test_periodicity():
from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as periodic_gpu
from pystencils.slicing import get_periodic_boundary_functor as periodic_cpu
arr_cpu = np.arange(50, dtype=np.float64).reshape(5, 5, 2)
arr_gpu = gpuarray.to_gpu(arr_cpu)
periodicity_stencil = [(1, 0), (-1, 0), (1, 1)]
periodic_gpu_kernel = periodic_gpu(periodicity_stencil, (5, 5), 1, 2)
periodic_cpu_kernel = periodic_cpu(periodicity_stencil)
cpu_result = np.copy(arr_cpu)
periodic_cpu_kernel(cpu_result)
gpu_result = np.copy(arr_cpu)
periodic_gpu_kernel(pdfs=arr_gpu)
arr_gpu.get(gpu_result)
np.testing.assert_equal(cpu_result, gpu_result)