UniformGridGPU.py 6.23 KB
Newer Older
Martin Bauer's avatar
Martin Bauer committed
1
import sympy as sp
2
import numpy as np
3
import pystencils as ps
4

5
6
from pystencils.data_types import TypedSymbol
from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions
7
8
9
10
11
12
13
14
15
16

from lbmpy.advanced_streaming import Timestep, is_inplace
from lbmpy.advanced_streaming.utility import streaming_patterns
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.creationfunctions import create_lb_collision_rule
from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
from lbmpy.stencils import get_stencil

from pystencils_walberla import CodeGeneration, generate_info_header, generate_sweep
from lbmpy_walberla import generate_alternating_lbm_sweep, generate_lb_pack_info, generate_alternating_lbm_boundary
Martin Bauer's avatar
Martin Bauer committed
17

18
omega = sp.symbols("omega")
19
omega_free = sp.Symbol("omega_free")
20
21
22
23
24
25
26
compile_time_block_size = False

if compile_time_block_size:
    sweep_block_size = (128, 1, 1)
else:
    sweep_block_size = (TypedSymbol("cudaBlockSize0", np.int32),
                        TypedSymbol("cudaBlockSize1", np.int32),
27
                        TypedSymbol("cudaBlockSize2", np.int32))
Martin Bauer's avatar
Martin Bauer committed
28

29
gpu_indexing_params = {'block_size': sweep_block_size}
Martin Bauer's avatar
Martin Bauer committed
30

31
options_dict = {
32
    'srt': {
Martin Bauer's avatar
Martin Bauer committed
33
        'method': 'srt',
34
        'relaxation_rate': omega,
Martin Bauer's avatar
Martin Bauer committed
35
        'compressible': False,
36
37
38
39
40
41
42
    },
    'trt': {
        'method': 'trt',
        'relaxation_rate': omega,
    },
    'mrt': {
        'method': 'mrt',
43
        'relaxation_rates': [omega, 1, 1, 1, 1, 1, 1],
44
    },
45
    'mrt-overrelax': {
46
        'method': 'mrt',
47
        'relaxation_rates': [omega, 1.3, 1.4, omega, 1.2, 1.1],
48
    },
49
50
51
    'cumulant': {
        'method': 'cumulant',
        'relaxation_rate': omega,
52
53
        'compressible': True,
    },
54
55
56
    'cumulant-overrelax': {
        'method': 'cumulant',
        'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 11)],
57
        'compressible': True,
58
59
60
61
62
    },
    'entropic': {
        'method': 'mrt',
        'compressible': True,
        'relaxation_rates': [omega, omega, omega_free, omega_free, omega_free],
63
64
        'entropic': True,
    },
65
66
67
68
    'smagorinsky': {
        'method': 'srt',
        'smagorinsky': True,
        'relaxation_rate': omega,
69
    }
70
71
}

72
73
info_header = """
const char * infoStencil = "{stencil}";
74
75
const char * infoStreamingPattern = "{streaming_pattern}";
const char * infoCollisionSetup = "{collision_setup}";
76
77
78
79
const bool infoCseGlobal = {cse_global};
const bool infoCsePdfs = {cse_pdfs};
"""

80
81
# DEFAULTS
optimize = True
82

83
with CodeGeneration() as ctx:
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
    config_tokens = ctx.config.split('_')

    assert len(config_tokens) >= 3
    stencil_str = config_tokens[0]
    streaming_pattern = config_tokens[1]
    collision_setup = config_tokens[2]

    if len(config_tokens) >= 4:
        optimize = (config_tokens[3] != 'noopt')

    stencil = get_stencil(stencil_str)
    assert streaming_pattern in streaming_patterns, f"Invalid streaming pattern: {streaming_pattern}"

    options = options_dict[collision_setup]

    q = len(stencil)
    dim = len(stencil[0])
    assert dim == 3, "This app supports only three-dimensional stencils"
    pdfs, pdfs_tmp, velocity_field = ps.fields(f"pdfs({q}), pdfs_tmp({q}), velocity(3) : double[3D]", layout='fzyx')
103
104

    common_options = {
105
106
107
108
109
110
111
112
113
114
        'stencil': stencil,
        'field_name': pdfs.name,
        'optimization': {
            'target': 'gpu',
            'cse_global': True,
            'cse_pdfs': False,
            'symbolic_field': pdfs,
            'field_layout': 'fzyx',
            'gpu_indexing_params': gpu_indexing_params,
        }
Martin Bauer's avatar
Martin Bauer committed
115
    }
116

117
    options.update(common_options)
118

119
120
121
122
123
    if not is_inplace(streaming_pattern):
        options['optimization']['symbolic_temporary_field'] = pdfs_tmp
        field_swaps = [(pdfs, pdfs_tmp)]
    else:
        field_swaps = []
124

125
126
    vp = [
        ('int32_t', 'cudaBlockSize0'),
127
        ('int32_t', 'cudaBlockSize1'),
128
        ('int32_t', 'cudaBlockSize2')
129
    ]
130
131
132
133
134
135
136
137
138
139
140
141
142

    # LB Sweep
    collision_rule = create_lb_collision_rule(**options)

    if optimize:
        collision_rule = insert_fast_divisions(collision_rule)
        collision_rule = insert_fast_sqrts(collision_rule)

    lb_method = collision_rule.method

    generate_alternating_lbm_sweep(ctx, 'UniformGridGPU_LbKernel', collision_rule, streaming_pattern,
                                   optimization=options['optimization'],
                                   inner_outer_split=True, varying_parameters=vp, field_swaps=field_swaps)
Martin Bauer's avatar
Martin Bauer committed
143

144
    # getter & setter
145
146
147
148
149
150
151
152
153
154
155
156
157
158
    setter_assignments = macroscopic_values_setter(lb_method, density=1.0, velocity=velocity_field.center_vector,
                                                   pdfs=pdfs,
                                                   streaming_pattern=streaming_pattern,
                                                   previous_timestep=Timestep.EVEN)
    generate_sweep(ctx, 'UniformGridGPU_MacroSetter', setter_assignments, target='gpu')

    # Boundaries
    noslip = NoSlip()
    ubb = UBB((0.05, 0, 0))

    generate_alternating_lbm_boundary(ctx, 'UniformGridGPU_NoSlip', noslip, lb_method, field_name=pdfs.name,
                                      streaming_pattern=streaming_pattern, target='gpu')
    generate_alternating_lbm_boundary(ctx, 'UniformGridGPU_UBB', ubb, lb_method, field_name=pdfs.name,
                                      streaming_pattern=streaming_pattern, target='gpu')
159

Martin Bauer's avatar
Martin Bauer committed
160
    # communication
161
162
163
    generate_lb_pack_info(ctx, 'UniformGridGPU_PackInfo', stencil, pdfs,
                          streaming_pattern=streaming_pattern, target='gpu',
                          always_generate_separate_classes=True)
164
165
166

    infoHeaderParams = {
        'stencil': stencil_str,
167
168
        'streaming_pattern': streaming_pattern,
        'collision_setup': collision_setup,
169
170
171
        'cse_global': int(options['optimization']['cse_global']),
        'cse_pdfs': int(options['optimization']['cse_pdfs']),
    }
172
173
174
175
176
177
178
179
180
181

    stencil_typedefs = {'Stencil_T': stencil,
                        'CommunicationStencil_T': stencil}
    field_typedefs = {'PdfField_T': pdfs,
                      'VelocityField_T': velocity_field}

    # Info header containing correct template definitions for stencil and field
    generate_info_header(ctx, 'UniformGridGPU_InfoHeader',
                         stencil_typedefs=stencil_typedefs, field_typedefs=field_typedefs,
                         additional_code=info_header.format(**infoHeaderParams))