communication.py 9.65 KB
Newer Older
1
2
3
4
from pystencils import Field, Assignment
from pystencils.slicing import shift_slice, get_slice_before_ghost_layer, normalize_slice
from lbmpy.advanced_streaming.utility import is_inplace, get_accessor, numeric_index, \
    numeric_offsets, Timestep, get_timesteps
5
6
from pystencils.datahandling import SerialDataHandling
from itertools import chain
Frederik Hennig's avatar
Frederik Hennig committed
7

Frederik Hennig's avatar
Frederik Hennig committed
8

Frederik Hennig's avatar
Frederik Hennig committed
9
def _trim_slice_in_direction(slices, direction):
Frederik Hennig's avatar
Frederik Hennig committed
10
    assert len(slices) == len(direction)
Frederik Hennig's avatar
Frederik Hennig committed
11
12
13

    result = []
    for s, d in zip(slices, direction):
14
15
16
        if isinstance(s, int):
            result.append(s)
            continue
Frederik Hennig's avatar
Frederik Hennig committed
17
18
19
20
21
22
23
        start = s.start + 1 if d == -1 else s.start
        stop = s.stop - 1 if d == 1 else s.stop
        result.append(slice(start, stop, s.step))

    return tuple(result)


Frederik Hennig's avatar
Frederik Hennig committed
24
def _extend_dir(direction):
Frederik Hennig's avatar
Frederik Hennig committed
25
26
27
28
    if len(direction) == 0:
        yield tuple()
    elif direction[0] == 0:
        for d in [-1, 0, 1]:
Frederik Hennig's avatar
Frederik Hennig committed
29
            for rest in _extend_dir(direction[1:]):
Frederik Hennig's avatar
Frederik Hennig committed
30
31
                yield (d, ) + rest
    else:
Frederik Hennig's avatar
Frederik Hennig committed
32
        for rest in _extend_dir(direction[1:]):
Frederik Hennig's avatar
Frederik Hennig committed
33
34
35
36
37
38
39
            yield (direction[0], ) + rest


def _get_neighbour_transform(direction, ghost_layers):
    return tuple(d * (ghost_layers + 1) for d in direction)


Frederik Hennig's avatar
Frederik Hennig committed
40
def _fix_length_one_slices(slices):
41
42
43
44
45
46
47
    """Slices of length one are replaced by their start value for correct periodic shifting"""
    if isinstance(slices, int):
        return slices
    elif isinstance(slices, slice):
        if slices.stop is not None and abs(slices.start - slices.stop) == 1:
            return slices.start
        elif slices.stop is None and slices.start == -1:
Frederik Hennig's avatar
Frederik Hennig committed
48
            return -1  # [-1:] also has length one
Frederik Hennig's avatar
Frederik Hennig committed
49
        else:
50
51
52
            return slices
    else:
        return tuple(_fix_length_one_slices(s) for s in slices)
Frederik Hennig's avatar
Frederik Hennig committed
53
54


Frederik Hennig's avatar
Frederik Hennig committed
55
def get_communication_slices(
Frederik Hennig's avatar
Frederik Hennig committed
56
        stencil, comm_stencil=None, streaming_pattern='pull', prev_timestep=Timestep.BOTH, ghost_layers=1):
Frederik Hennig's avatar
Frederik Hennig committed
57
    """
58
    Return the source and destination slices for periodicity handling or communication between blocks.
Frederik Hennig's avatar
Frederik Hennig committed
59

60
61
    :param stencil: The stencil used by the LB method.
    :param comm_stencil: The stencil defining the communication directions. If None, it will be set to stencil.
62
    :param streaming_pattern: The streaming pattern.
Frederik Hennig's avatar
Frederik Hennig committed
63
    :param prev_timestep: Timestep after which communication is run.
Frederik Hennig's avatar
Frederik Hennig committed
64
65
66
    :param ghost_layers: Number of ghost layers in each direction.

    """
67
68
69
    if comm_stencil is None:
        comm_stencil = stencil

Frederik Hennig's avatar
Frederik Hennig committed
70
    pdfs = Field.create_generic('pdfs', spatial_dimensions=len(stencil[0]), index_shape=(len(stencil),))
Frederik Hennig's avatar
Frederik Hennig committed
71
    write_accesses = get_accessor(streaming_pattern, prev_timestep).write(pdfs, stencil)
72
    slices_per_comm_direction = dict()
Frederik Hennig's avatar
Frederik Hennig committed
73

74
    for comm_dir in comm_stencil:
Frederik Hennig's avatar
Frederik Hennig committed
75
76
77
        if all(d == 0 for d in comm_dir):
            continue

78
        slices_for_dir = []
Frederik Hennig's avatar
Frederik Hennig committed
79

Frederik Hennig's avatar
Frederik Hennig committed
80
        for streaming_dir in set(_extend_dir(comm_dir)) & set(stencil):
Frederik Hennig's avatar
Frederik Hennig committed
81
            d = stencil.index(streaming_dir)
Frederik Hennig's avatar
Frederik Hennig committed
82
83
            write_offsets = numeric_offsets(write_accesses[d])
            write_index = numeric_index(write_accesses[d])[0]
Frederik Hennig's avatar
Frederik Hennig committed
84

Frederik Hennig's avatar
Frederik Hennig committed
85
86
            tangential_dir = tuple(s - c for s, c in zip(streaming_dir, comm_dir))
            origin_slice = get_slice_before_ghost_layer(comm_dir, ghost_layers=ghost_layers, thickness=1)
Frederik Hennig's avatar
Frederik Hennig committed
87
            origin_slice = _fix_length_one_slices(origin_slice)
Frederik Hennig's avatar
Frederik Hennig committed
88
            src_slice = shift_slice(_trim_slice_in_direction(origin_slice, tangential_dir), write_offsets)
Frederik Hennig's avatar
Frederik Hennig committed
89

Frederik Hennig's avatar
Frederik Hennig committed
90
91
            neighbour_transform = _get_neighbour_transform(comm_dir, ghost_layers)
            dst_slice = shift_slice(src_slice, neighbour_transform)
Frederik Hennig's avatar
Frederik Hennig committed
92

93
94
95
96
            src_slice = src_slice + (write_index, )
            dst_slice = dst_slice + (write_index, )

            slices_for_dir.append((src_slice, dst_slice))
Frederik Hennig's avatar
Frederik Hennig committed
97

98
99
        slices_per_comm_direction[comm_dir] = slices_for_dir
    return slices_per_comm_direction
100
101


102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
def periodic_pdf_copy_kernel(pdf_field, src_slice, dst_slice,
                             domain_size=None, target='gpu',
                             opencl_queue=None, opencl_ctx=None):
    """Copies a rectangular array slice onto another non-overlapping array slice"""
    from pystencils.gpucuda.kernelcreation import create_cuda_kernel

    pdf_idx = src_slice[-1]
    assert isinstance(pdf_idx, int), "PDF index needs to be an integer constant"
    assert pdf_idx == dst_slice[-1], "Source and Destination PDF indices must be equal"
    src_slice = src_slice[:-1]
    dst_slice = dst_slice[:-1]

    if domain_size is None:
        domain_size = pdf_field.spatial_shape

    normalized_from_slice = normalize_slice(src_slice, domain_size)
    normalized_to_slice = normalize_slice(dst_slice, domain_size)

    def _start(s):
        return s.start if isinstance(s, slice) else s

    def _stop(s):
        return s.stop if isinstance(s, slice) else s

    offset = [_start(s1) - _start(s2) for s1, s2 in zip(normalized_from_slice, normalized_to_slice)]
    assert offset == [_stop(s1) - _stop(s2) for s1, s2 in zip(normalized_from_slice, normalized_to_slice)], \
        "Slices have to have same size"

    copy_eq = Assignment(pdf_field(pdf_idx), pdf_field[tuple(offset)](pdf_idx))
    ast = create_cuda_kernel([copy_eq], iteration_slice=dst_slice, skip_independence_check=True)
    if target == 'gpu':
        from pystencils.gpucuda import make_python_function
        return make_python_function(ast)
    elif target == 'opencl':
        from pystencils.opencl import make_python_function
        return make_python_function(ast, opencl_queue, opencl_ctx)
    else:
        raise ValueError('Invalid target:', target)


142
143
class PeriodicityHandling:

144
    def __init__(self, stencil, data_handling, pdf_field_name,
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
                 streaming_pattern='pull', ghost_layers=1, target='cpu',
                 opencl_queue=None, opencl_ctx=None,
                 pycuda_direct_copy=True):
        """
            Periodicity Handling for Lattice Boltzmann Streaming.

            **On the usage with cuda/opencl:** 
            - pycuda allows the copying of sliced arrays within device memory using the numpy syntax,
            e.g. `dst[:,0] = src[:,-1]`. In this implementation, this is the default for periodicity
            handling. Alternatively, if you set `pycuda_direct_copy=False`, GPU kernels are generated and
            compiled. The compiled kernels are almost twice as fast in execution as pycuda array copying,
            but especially for large stencils like D3Q27, their compilation can take up to 20 seconds. 
            Choose your weapon depending on your use case.

            - pyopencl does not support copying of non-contiguous sliced arrays, so the usage of compiled
            copy kernels is forced on us. On the positive side, compilation of the OpenCL kernels appears
            to be about four times faster.
        """
163
        if not isinstance(data_handling, SerialDataHandling):
164
            raise ValueError('Only serial data handling is supported!')
165

166
167
        assert target in ['cpu', 'gpu', 'opencl']

168
169
170
        self.stencil = stencil
        self.dh = data_handling
        self.pdf_field_name = pdf_field_name
171
        self.ghost_layers = ghost_layers
172
173
        periodicity = data_handling.periodicity
        self.inplace_pattern = is_inplace(streaming_pattern)
174
175
176
177
178
        self.target = target
        self.cpu = target == 'cpu'
        self.opencl_queue = opencl_queue
        self.opencl_ctx = opencl_ctx
        self.pycuda_direct_copy = target == 'gpu' and pycuda_direct_copy
179
180
181
182
183
184
185
186
187

        def is_copy_direction(direction):
            for d, p in zip(direction, periodicity):
                if d != 0 and not p:
                    return False

            return True

        copy_directions = tuple(filter(is_copy_direction, stencil[1:]))
Frederik Hennig's avatar
Frederik Hennig committed
188
        self.comm_slices = []
189
190
        timesteps = get_timesteps(streaming_pattern)
        for timestep in timesteps:
191
            slices_per_comm_dir = get_communication_slices(stencil=stencil,
192
193
194
195
                                                           comm_stencil=copy_directions,
                                                           streaming_pattern=streaming_pattern,
                                                           prev_timestep=timestep,
                                                           ghost_layers=ghost_layers)
Frederik Hennig's avatar
Frederik Hennig committed
196
            self.comm_slices.append(list(chain.from_iterable(v for k, v in slices_per_comm_dir.items())))
197

198
199
200
201
202
        if target == 'opencl' or (target == 'gpu' and not pycuda_direct_copy):
            self.device_copy_kernels = []
            for timestep in timesteps:
                self.device_copy_kernels.append(self._compile_copy_kernels(timestep))

Frederik Hennig's avatar
Frederik Hennig committed
203
    def __call__(self, prev_timestep=Timestep.BOTH):
204
        if self.cpu:
Frederik Hennig's avatar
Frederik Hennig committed
205
            self._periodicity_handling_cpu(prev_timestep)
206
207
        else:
            self._periodicity_handling_gpu(prev_timestep)
208

Frederik Hennig's avatar
Frederik Hennig committed
209
    def _periodicity_handling_cpu(self, prev_timestep):
210
        arr = self.dh.cpu_arrays[self.pdf_field_name]
Frederik Hennig's avatar
Frederik Hennig committed
211
        comm_slices = self.comm_slices[prev_timestep.idx]
212
213
214
        for src, dst in comm_slices:
            arr[dst] = arr[src]

215
216
217
218
219
220
221
222
223
224
    def _compile_copy_kernels(self, timestep):
        pdf_field = self.dh.fields[self.pdf_field_name]
        kernels = []
        for src, dst in self.comm_slices[timestep.idx]:
            kernels.append(
                periodic_pdf_copy_kernel(
                    pdf_field, src, dst, target=self.target, 
                    opencl_queue=self.opencl_queue, opencl_ctx=self.opencl_ctx))
        return kernels

Frederik Hennig's avatar
Frederik Hennig committed
225
    def _periodicity_handling_gpu(self, prev_timestep):
226
227
228
229
230
231
232
233
        arr = self.dh.gpu_arrays[self.pdf_field_name]
        if self.pycuda_direct_copy:
            for src, dst in self.comm_slices[prev_timestep.idx]:
                arr[dst] = arr[src]
        else:
            kernel_args = {self.pdf_field_name: arr}
            for kernel in self.device_copy_kernels[prev_timestep.idx]:
                kernel(**kernel_args)