UniformGridGPU.py 7.17 KB
Newer Older
Martin Bauer's avatar
Martin Bauer committed
1
import sympy as sp
2
import numpy as np
3
import pystencils as ps
4

5
6
from pystencils.data_types import TypedSymbol
from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions
7
8
9
10
11
12
13

from lbmpy.advanced_streaming import Timestep, is_inplace
from lbmpy.advanced_streaming.utility import streaming_patterns
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.creationfunctions import create_lb_collision_rule
from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
from lbmpy.stencils import get_stencil
14
15
from lbmpy.updatekernels import create_stream_only_kernel
from lbmpy.fieldaccess import *
16
17
18

from pystencils_walberla import CodeGeneration, generate_info_header, generate_sweep
from lbmpy_walberla import generate_alternating_lbm_sweep, generate_lb_pack_info, generate_alternating_lbm_boundary
Martin Bauer's avatar
Martin Bauer committed
19

20
omega = sp.symbols("omega")
21
omega_free = sp.Symbol("omega_free")
22
23
24
25
26
27
28
compile_time_block_size = False

if compile_time_block_size:
    sweep_block_size = (128, 1, 1)
else:
    sweep_block_size = (TypedSymbol("cudaBlockSize0", np.int32),
                        TypedSymbol("cudaBlockSize1", np.int32),
29
                        TypedSymbol("cudaBlockSize2", np.int32))
Martin Bauer's avatar
Martin Bauer committed
30

31
gpu_indexing_params = {'block_size': sweep_block_size}
Martin Bauer's avatar
Martin Bauer committed
32

33
options_dict = {
34
    'srt': {
Martin Bauer's avatar
Martin Bauer committed
35
        'method': 'srt',
36
        'relaxation_rate': omega,
Martin Bauer's avatar
Martin Bauer committed
37
        'compressible': False,
38
39
40
41
42
43
44
    },
    'trt': {
        'method': 'trt',
        'relaxation_rate': omega,
    },
    'mrt': {
        'method': 'mrt',
45
        'relaxation_rates': [omega, 1, 1, 1, 1, 1, 1],
46
    },
47
    'mrt-overrelax': {
48
        'method': 'mrt',
49
        'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 11)],
50
    },
51
52
53
    'cumulant': {
        'method': 'cumulant',
        'relaxation_rate': omega,
54
55
        'compressible': True,
    },
56
57
58
    'cumulant-overrelax': {
        'method': 'cumulant',
        'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 11)],
59
        'compressible': True,
60
61
62
63
    },
    'entropic': {
        'method': 'mrt',
        'compressible': True,
64
        'relaxation_rates': [omega, omega] + [omega_free] * 6,
65
66
        'entropic': True,
    },
67
68
69
70
    'smagorinsky': {
        'method': 'srt',
        'smagorinsky': True,
        'relaxation_rate': omega,
71
    }
72
73
}

74
75
info_header = """
const char * infoStencil = "{stencil}";
76
77
const char * infoStreamingPattern = "{streaming_pattern}";
const char * infoCollisionSetup = "{collision_setup}";
78
79
80
81
const bool infoCseGlobal = {cse_global};
const bool infoCsePdfs = {cse_pdfs};
"""

82
83
# DEFAULTS
optimize = True
84

85
with CodeGeneration() as ctx:
86
    field_type = "float64" if ctx.double_accuracy else "float32"
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
    config_tokens = ctx.config.split('_')

    assert len(config_tokens) >= 3
    stencil_str = config_tokens[0]
    streaming_pattern = config_tokens[1]
    collision_setup = config_tokens[2]

    if len(config_tokens) >= 4:
        optimize = (config_tokens[3] != 'noopt')

    stencil = get_stencil(stencil_str)
    assert streaming_pattern in streaming_patterns, f"Invalid streaming pattern: {streaming_pattern}"

    options = options_dict[collision_setup]

    q = len(stencil)
    dim = len(stencil[0])
    assert dim == 3, "This app supports only three-dimensional stencils"
105
106
    pdfs, pdfs_tmp, velocity_field = ps.fields(f"pdfs({q}), pdfs_tmp({q}), velocity(3) : {field_type}[3D]",
                                               layout='fzyx')
107
108

    common_options = {
109
110
111
112
113
114
115
116
        'stencil': stencil,
        'field_name': pdfs.name,
        'optimization': {
            'target': 'gpu',
            'cse_global': True,
            'cse_pdfs': False,
            'symbolic_field': pdfs,
            'field_layout': 'fzyx',
117
            'gpu_indexing_params': gpu_indexing_params
118
        }
Martin Bauer's avatar
Martin Bauer committed
119
    }
120

121
    options.update(common_options)
122

123
124
125
126
127
    if not is_inplace(streaming_pattern):
        options['optimization']['symbolic_temporary_field'] = pdfs_tmp
        field_swaps = [(pdfs, pdfs_tmp)]
    else:
        field_swaps = []
128

129
130
    vp = [
        ('int32_t', 'cudaBlockSize0'),
131
        ('int32_t', 'cudaBlockSize1'),
132
        ('int32_t', 'cudaBlockSize2')
133
    ]
134

135
136
137
138
139
140
141
142
    # Sweep for Stream only. This is for benchmarking an empty streaming pattern without LBM.
    # is_inplace is set to False to ensure that the streaming is done with src and dst field.
    # If this is not the case the compiler might simplify the streaming in a way that benchmarking makes no sense.
    accessor = CollideOnlyInplaceAccessor()
    accessor.is_inplace = False
    field_swaps_stream_only = [(pdfs, pdfs_tmp)]
    stream_only_kernel = create_stream_only_kernel(stencil, pdfs, pdfs_tmp, accessor=accessor)

143
144
145
146
147
148
149
150
151
152
153
154
    # LB Sweep
    collision_rule = create_lb_collision_rule(**options)

    if optimize:
        collision_rule = insert_fast_divisions(collision_rule)
        collision_rule = insert_fast_sqrts(collision_rule)

    lb_method = collision_rule.method

    generate_alternating_lbm_sweep(ctx, 'UniformGridGPU_LbKernel', collision_rule, streaming_pattern,
                                   optimization=options['optimization'],
                                   inner_outer_split=True, varying_parameters=vp, field_swaps=field_swaps)
Martin Bauer's avatar
Martin Bauer committed
155

156
    # getter & setter
157
158
159
160
161
162
    setter_assignments = macroscopic_values_setter(lb_method, density=1.0, velocity=velocity_field.center_vector,
                                                   pdfs=pdfs,
                                                   streaming_pattern=streaming_pattern,
                                                   previous_timestep=Timestep.EVEN)
    generate_sweep(ctx, 'UniformGridGPU_MacroSetter', setter_assignments, target='gpu')

163
164
165
166
    # Stream only kernel
    generate_sweep(ctx, 'UniformGridGPU_StreamOnlyKernel', stream_only_kernel, field_swaps=field_swaps_stream_only,
                   gpu_indexing_params=gpu_indexing_params, varying_parameters=vp, target='gpu')

167
168
169
170
171
172
173
174
    # Boundaries
    noslip = NoSlip()
    ubb = UBB((0.05, 0, 0))

    generate_alternating_lbm_boundary(ctx, 'UniformGridGPU_NoSlip', noslip, lb_method, field_name=pdfs.name,
                                      streaming_pattern=streaming_pattern, target='gpu')
    generate_alternating_lbm_boundary(ctx, 'UniformGridGPU_UBB', ubb, lb_method, field_name=pdfs.name,
                                      streaming_pattern=streaming_pattern, target='gpu')
175

Martin Bauer's avatar
Martin Bauer committed
176
    # communication
177
178
179
    generate_lb_pack_info(ctx, 'UniformGridGPU_PackInfo', stencil, pdfs,
                          streaming_pattern=streaming_pattern, target='gpu',
                          always_generate_separate_classes=True)
180
181
182

    infoHeaderParams = {
        'stencil': stencil_str,
183
184
        'streaming_pattern': streaming_pattern,
        'collision_setup': collision_setup,
185
186
187
        'cse_global': int(options['optimization']['cse_global']),
        'cse_pdfs': int(options['optimization']['cse_pdfs']),
    }
188
189
190
191
192
193
194
195
196
197

    stencil_typedefs = {'Stencil_T': stencil,
                        'CommunicationStencil_T': stencil}
    field_typedefs = {'PdfField_T': pdfs,
                      'VelocityField_T': velocity_field}

    # Info header containing correct template definitions for stencil and field
    generate_info_header(ctx, 'UniformGridGPU_InfoHeader',
                         stencil_typedefs=stencil_typedefs, field_typedefs=field_typedefs,
                         additional_code=info_header.format(**infoHeaderParams))