Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Florian Weik
waLBerla
Commits
00570bba
Commit
00570bba
authored
Jan 09, 2019
by
Martin Bauer
Browse files
Memory leak bugfix - regenerated files
parent
77aed6a0
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.cpp
View file @
00570bba
...
...
@@ -129,7 +129,6 @@ static FUNC_PREFIX void kernel_streamCollide(double * const _data_pdfs, double *
const
double
vel1Term
=
_data_pdfs_20_31_1m1
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_37_1m1
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
+
_data_pdfs_21_315_1m1
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_2m1_311_1m1
[
_stride_pdfs_0
*
ctr_0
];
const
double
vel2Term
=
_data_pdfs_2m1_312_11
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_2m1_313_10
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
+
_data_pdfs_2m1_35_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
rho
=
vel0Term
+
vel1Term
+
vel2Term
+
_data_pdfs_20_30_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_32_11
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_33_10
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
+
_data_pdfs_20_39_11
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
+
_data_pdfs_21_316_11
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_21_317_10
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
+
_data_pdfs_21_36_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
xi_27
=
rho
*-
0.333333333333333
;
const
double
u_0
=
vel0Term
+
xi_18
+
xi_19
-
_data_pdfs_20_33_10
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
-
_data_pdfs_20_37_1m1
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
-
_data_pdfs_2m1_313_10
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
];
const
double
xi_23
=
(
u_0
*
u_0
);
const
double
u_1
=
vel1Term
+
xi_19
+
xi_20
-
_data_pdfs_20_310_11
[
_stride_pdfs_0
*
ctr_0
-
_stride_pdfs_0
]
-
_data_pdfs_20_32_11
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_38_1m1
[
_stride_pdfs_0
*
ctr_0
-
_stride_pdfs_0
]
-
_data_pdfs_2m1_312_11
[
_stride_pdfs_0
*
ctr_0
];
...
...
@@ -146,9 +145,10 @@ static FUNC_PREFIX void kernel_streamCollide(double * const _data_pdfs, double *
const
double
u0Pu2
=
u_0
+
u_2
;
const
double
f_eq_common
=
rho
-
xi_23
-
xi_24
-
xi_25
;
const
double
xi_26
=
f_eq_common
+
rho
*-
0.666666666666667
;
const
double
xi_28
=
f_eq_common
+
xi_25
+
xi_27
;
const
double
xi_29
=
f_eq_common
+
xi_23
+
xi_27
;
const
double
xi_30
=
f_eq_common
+
xi_24
+
xi_27
;
const
double
xi_27
=
f_eq_common
+
rho
*-
0.333333333333333
;
const
double
xi_28
=
xi_25
+
xi_27
;
const
double
xi_29
=
xi_23
+
xi_27
;
const
double
xi_30
=
xi_24
+
xi_27
;
const
double
xi_2
=
xi_24
*
2
+
xi_26
;
const
double
xi_3
=
xi_23
*
2
+
xi_26
;
const
double
xi_4
=
xi_25
*
2
+
xi_26
;
...
...
@@ -195,62 +195,80 @@ static FUNC_PREFIX void kernel_collide(double * _data_pdfs, int64_t const _size_
const
double
xi_5
=
omega
*
0.0416666666666667
;
for
(
int
ctr_2
=
1
;
ctr_2
<
_size_pdfs_2
-
1
;
ctr_2
+=
1
)
{
double
*
_data_pdfs_20_317
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
17
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_39
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
9
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_316
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
16
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_310
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
10
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_312
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
12
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_31
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
_stride_pdfs_3
;
double
*
_data_pdfs_20_314
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
14
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_318
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
18
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_34
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
4
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_38
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
8
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_31
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
_stride_pdfs_3
;
double
*
_data_pdfs_20_311
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
11
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_317
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
17
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_315
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
15
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_37
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
7
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_312
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
12
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_316
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
16
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_311
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
11
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_39
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
9
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_32
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
2
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_313
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
13
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_37
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
7
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_310
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
10
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_34
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
4
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_35
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
5
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_30
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
;
double
*
_data_pdfs_20_32
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
2
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_33
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
3
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_38
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
8
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_30
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
;
double
*
_data_pdfs_20_36
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
6
*
_stride_pdfs_3
;
for
(
int
ctr_1
=
1
;
ctr_1
<
_size_pdfs_1
-
1
;
ctr_1
+=
1
)
{
double
*
_data_pdfs_20_317_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_317
;
double
*
_data_pdfs_20_39_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_39
;
double
*
_data_pdfs_20_316_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_316
;
double
*
_data_pdfs_20_310_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_310
;
double
*
_data_pdfs_20_312_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_312
;
double
*
_data_pdfs_20_31_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_31
;
double
*
_data_pdfs_20_314_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_314
;
double
*
_data_pdfs_20_318_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_318
;
double
*
_data_pdfs_20_34_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_34
;
double
*
_data_pdfs_20_38_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_38
;
double
*
_data_pdfs_20_31_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_31
;
double
*
_data_pdfs_20_311_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_311
;
double
*
_data_pdfs_20_317_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_317
;
double
*
_data_pdfs_20_315_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_315
;
double
*
_data_pdfs_20_37_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_37
;
double
*
_data_pdfs_20_312_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_312
;
double
*
_data_pdfs_20_316_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_316
;
double
*
_data_pdfs_20_311_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_311
;
double
*
_data_pdfs_20_39_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_39
;
double
*
_data_pdfs_20_32_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_32
;
double
*
_data_pdfs_20_313_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_313
;
double
*
_data_pdfs_20_37_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_37
;
double
*
_data_pdfs_20_310_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_310
;
double
*
_data_pdfs_20_34_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_34
;
double
*
_data_pdfs_20_35_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_35
;
double
*
_data_pdfs_20_30_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_30
;
double
*
_data_pdfs_20_32_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_32
;
double
*
_data_pdfs_20_33_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_33
;
double
*
_data_pdfs_20_38_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_38
;
double
*
_data_pdfs_20_30_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_30
;
double
*
_data_pdfs_20_36_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_36
;
for
(
int
ctr_0
=
1
;
ctr_0
<
_size_pdfs_0
-
1
;
ctr_0
+=
1
)
{
const
double
xi_18
=
-
_data_pdfs_20_317_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
xi_19
=
-
_data_pdfs_20_39_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
xi_20
=
-
_data_pdfs_20_316_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
vel0Term
=
_data_pdfs_20_310_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_314_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_318_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_34_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_38_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
vel1Term
=
_data_pdfs_20_311_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_315_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_31_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_37_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
vel2Term
=
_data_pdfs_20_312_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_313_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_35_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
rho
=
vel0Term
+
vel1Term
+
vel2Term
+
_data_pdfs_20_30_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_316_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_317_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_32_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_33_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_36_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_39_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
xi_27
=
rho
*-
0.333333333333333
;
const
double
u_0
=
vel0Term
+
xi_18
+
xi_19
-
_data_pdfs_20_313_10
[
_stride_pdfs_0
*
ctr_0
]
-
_data_pdfs_20_33_10
[
_stride_pdfs_0
*
ctr_0
]
-
_data_pdfs_20_37_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_18
=
_data_pdfs_20_312_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_19
=
_data_pdfs_20_31_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_20
=
_data_pdfs_20_314_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_21
=
_data_pdfs_20_318_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_22
=
_data_pdfs_20_317_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_23
=
_data_pdfs_20_315_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_24
=
_data_pdfs_20_316_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_25
=
_data_pdfs_20_311_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_26
=
_data_pdfs_20_39_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_27
=
_data_pdfs_20_32_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_28
=
_data_pdfs_20_313_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_29
=
_data_pdfs_20_37_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_30
=
_data_pdfs_20_310_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_31
=
_data_pdfs_20_34_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_32
=
_data_pdfs_20_35_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_33
=
_data_pdfs_20_33_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_34
=
_data_pdfs_20_38_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_35
=
_data_pdfs_20_30_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_36
=
_data_pdfs_20_36_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
xi_18
=
-
Dummy_22
;
const
double
xi_19
=
-
Dummy_26
;
const
double
xi_20
=
-
Dummy_24
;
const
double
vel0Term
=
Dummy_20
+
Dummy_21
+
Dummy_30
+
Dummy_31
+
Dummy_34
;
const
double
vel1Term
=
Dummy_19
+
Dummy_23
+
Dummy_25
+
Dummy_29
;
const
double
vel2Term
=
Dummy_18
+
Dummy_28
+
Dummy_32
;
const
double
rho
=
Dummy_22
+
Dummy_24
+
Dummy_26
+
Dummy_27
+
Dummy_33
+
Dummy_35
+
Dummy_36
+
vel0Term
+
vel1Term
+
vel2Term
;
const
double
u_0
=
-
Dummy_28
-
Dummy_29
-
Dummy_33
+
vel0Term
+
xi_18
+
xi_19
;
const
double
xi_23
=
(
u_0
*
u_0
);
const
double
u_1
=
vel1Term
+
xi_19
+
xi
_2
0
-
_data_pdfs_20_310_10
[
_stride_pdfs_0
*
ctr_0
]
-
_data_pdfs_20_312_10
[
_stride_pdfs_0
*
ctr_0
]
-
_data_pdfs_20_32_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_38_10
[
_stride_pdfs_0
*
ctr_0
]
;
const
double
u_1
=
-
Dummy_18
-
Dummy
_2
7
-
Dummy_30
+
Dummy_34
+
vel1Term
+
xi_19
+
xi_20
;
const
double
xi_21
=
-
u_1
;
const
double
xi_24
=
(
u_1
*
u_1
);
const
double
u_2
=
vel2Term
+
xi_18
+
xi
_2
0
+
_data_pdfs_20_311_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_314_10
[
_stride_pdfs_0
*
ctr_0
]
-
_data_pdfs_20_315_10
[
_stride_pdfs_0
*
ctr_0
]
-
_data_pdfs_20_318_10
[
_stride_pdfs_0
*
ctr_0
]
-
_data_pdfs_20_36_10
[
_stride_pdfs_0
*
ctr_0
]
;
const
double
u_2
=
Dummy_20
-
Dummy_21
-
Dummy
_2
3
+
Dummy_25
-
Dummy_36
+
vel2Term
+
xi_18
+
xi_20
;
const
double
xi_22
=
-
u_2
;
const
double
xi_25
=
(
u_2
*
u_2
);
const
double
u0Mu1
=
u_0
+
xi_21
;
...
...
@@ -261,9 +279,10 @@ static FUNC_PREFIX void kernel_collide(double * _data_pdfs, int64_t const _size_
const
double
u0Pu2
=
u_0
+
u_2
;
const
double
f_eq_common
=
rho
-
xi_23
-
xi_24
-
xi_25
;
const
double
xi_26
=
f_eq_common
+
rho
*-
0.666666666666667
;
const
double
xi_28
=
f_eq_common
+
xi_25
+
xi_27
;
const
double
xi_29
=
f_eq_common
+
xi_23
+
xi_27
;
const
double
xi_30
=
f_eq_common
+
xi_24
+
xi_27
;
const
double
xi_27
=
f_eq_common
+
rho
*-
0.333333333333333
;
const
double
xi_28
=
xi_25
+
xi_27
;
const
double
xi_29
=
xi_23
+
xi_27
;
const
double
xi_30
=
xi_24
+
xi_27
;
const
double
xi_2
=
xi_24
*
2
+
xi_26
;
const
double
xi_3
=
xi_23
*
2
+
xi_26
;
const
double
xi_4
=
xi_25
*
2
+
xi_26
;
...
...
@@ -279,25 +298,25 @@ static FUNC_PREFIX void kernel_collide(double * _data_pdfs, int64_t const _size_
const
double
xi_15
=
(
u0Mu2
*
u0Mu2
)
*
3
+
xi_30
;
const
double
xi_16
=
u0Pu2
*
2
;
const
double
xi_17
=
(
u0Pu2
*
u0Pu2
)
*
3
+
xi_30
;
_data_pdfs_20_30_10
[
_stride_pdfs_0
*
ctr_0
]
=
omega
*
(
f_eq_common
*
0.333333333333333
-
_data_pdfs_20_30_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_30_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_31_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_1
*
(
u
_1
+
xi_
2
-
6
*
_data_pdfs_20_31_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_31_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_32_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_1
*
(
xi_2
+
xi_21
-
6
*
_data_pdfs_20_32_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_32_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_33_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_1
*
(
-
u_0
+
xi_3
-
6
*
_data_pdfs_20_33_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_33_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_34_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_1
*
(
u_0
+
xi_3
-
6
*
_data_pdfs_20_34_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_34_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_35_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_1
*
(
u_2
+
xi_4
-
6
*
_data_pdfs_20_35_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_35_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_36_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_1
*
(
xi_22
+
xi_4
-
6
*
_data_pdfs_20_36_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_36_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_37_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
-
xi_6
+
xi_7
-
24
*
_data_pdfs_20_37_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_37_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_38_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
xi_8
+
xi_9
-
24
*
_data_pdfs_20_38_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_38_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_39_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
-
xi_8
+
xi_9
-
24
*
_data_pdfs_20_39_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_39_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_310_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
xi_6
+
xi_7
-
24
*
_data_pdfs_20_310_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_310_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_311_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
xi_10
+
xi_11
-
24
*
_data_pdfs_20_311_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_311_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_312_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
-
xi_12
+
xi_13
-
24
*
_data_pdfs_20_312_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_312_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_313_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
-
xi_14
+
xi_15
-
24
*
_data_pdfs_20_313_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_313_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_314_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
xi_16
+
xi_17
-
24
*
_data_pdfs_20_314_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_314_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_315_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
xi_12
+
xi_13
-
24
*
_data_pdfs_20_315_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_315_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_316_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
-
xi_10
+
xi_11
-
24
*
_data_pdfs_20_316_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_316_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_317_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
-
xi_16
+
xi_17
-
24
*
_data_pdfs_20_317_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_317_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_318_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
xi_14
+
xi_15
-
24
*
_data_pdfs_20_318_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_318_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_30_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_35
+
omega
*
(
-
Dummy_35
+
f_eq_common
*
0.333333333333333
)
;
_data_pdfs_20_31_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy
_1
9
+
xi_
1
*
(
Dummy_19
*-
6
+
u_1
+
xi_2
)
;
_data_pdfs_20_32_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_27
+
xi_1
*
(
Dummy_27
*-
6
+
xi_2
+
xi_21
)
;
_data_pdfs_20_33_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_33
+
xi_1
*
(
Dummy_33
*-
6
-
u_0
+
xi_3
)
;
_data_pdfs_20_34_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_31
+
xi_1
*
(
Dummy_31
*-
6
+
u_0
+
xi_3
)
;
_data_pdfs_20_35_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_32
+
xi_1
*
(
Dummy_32
*-
6
+
u_2
+
xi_4
)
;
_data_pdfs_20_36_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_36
+
xi_1
*
(
Dummy_36
*-
6
+
xi_22
+
xi_4
)
;
_data_pdfs_20_37_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_29
+
xi_5
*
(
Dummy_29
*-
24
-
xi_6
+
xi_7
)
;
_data_pdfs_20_38_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_34
+
xi_5
*
(
Dummy_34
*-
24
+
xi_8
+
xi_9
)
;
_data_pdfs_20_39_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_26
+
xi_5
*
(
Dummy_26
*-
24
-
xi_8
+
xi_9
)
;
_data_pdfs_20_310_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_30
+
xi_5
*
(
Dummy_30
*-
24
+
xi_6
+
xi_7
)
;
_data_pdfs_20_311_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_25
+
xi_5
*
(
Dummy_25
*-
24
+
xi_10
+
xi_11
)
;
_data_pdfs_20_312_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_18
+
xi_5
*
(
Dummy_18
*-
24
-
xi_12
+
xi_13
)
;
_data_pdfs_20_313_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_28
+
xi_5
*
(
Dummy_28
*-
24
-
xi_14
+
xi_15
)
;
_data_pdfs_20_314_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_20
+
xi_5
*
(
Dummy_20
*-
24
+
xi_16
+
xi_17
)
;
_data_pdfs_20_315_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_23
+
xi_5
*
(
Dummy_23
*-
24
+
xi_12
+
xi_13
)
;
_data_pdfs_20_316_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_24
+
xi_5
*
(
Dummy_24
*-
24
-
xi_10
+
xi_11
)
;
_data_pdfs_20_317_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_22
+
xi_5
*
(
Dummy_22
*-
24
-
xi_16
+
xi_17
)
;
_data_pdfs_20_318_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_21
+
xi_5
*
(
Dummy_21
*-
24
+
xi_14
+
xi_15
)
;
}
}
}
...
...
apps/benchmarks/UniformGridGPU/UniformGridGPU_LbKernel.cu
View file @
00570bba
...
...
@@ -73,7 +73,6 @@ static FUNC_PREFIX void UniformGridGPU_LbKernel(double * const _data_pdfs, doubl
double
*
const
_data_pdfs_11_20_32
=
_data_pdfs
+
_stride_pdfs_1
*
ctr_1
+
_stride_pdfs_1
+
_stride_pdfs_2
*
ctr_2
+
2
*
_stride_pdfs_3
;
double
*
const
_data_pdfs_10_21_36
=
_data_pdfs
+
_stride_pdfs_1
*
ctr_1
+
_stride_pdfs_2
*
ctr_2
+
_stride_pdfs_2
+
6
*
_stride_pdfs_3
;
const
double
rho
=
vel0Term
+
vel1Term
+
vel2Term
+
_data_pdfs_10_20_30
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_10_20_33
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
+
_data_pdfs_10_21_317
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
+
_data_pdfs_10_21_36
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_11_20_32
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_11_20_39
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
+
_data_pdfs_11_21_316
[
_stride_pdfs_0
*
ctr_0
];
const
double
xi_27
=
rho
*-
0.333333333333333
;
const
double
u_0
=
vel0Term
+
xi_18
+
xi_19
-
_data_pdfs_10_20_33
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
-
_data_pdfs_10_2m1_313
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
-
_data_pdfs_1m1_20_37
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
];
const
double
xi_23
=
(
u_0
*
u_0
);
const
double
u_1
=
vel1Term
+
xi_19
+
xi_20
-
_data_pdfs_11_20_310
[
_stride_pdfs_0
*
ctr_0
-
_stride_pdfs_0
]
-
_data_pdfs_11_20_32
[
_stride_pdfs_0
*
ctr_0
]
-
_data_pdfs_11_2m1_312
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_1m1_20_38
[
_stride_pdfs_0
*
ctr_0
-
_stride_pdfs_0
];
...
...
@@ -90,9 +89,10 @@ static FUNC_PREFIX void UniformGridGPU_LbKernel(double * const _data_pdfs, doubl
const
double
u0Pu2
=
u_0
+
u_2
;
const
double
f_eq_common
=
rho
-
xi_23
-
xi_24
-
xi_25
;
const
double
xi_26
=
f_eq_common
+
rho
*-
0.666666666666667
;
const
double
xi_28
=
f_eq_common
+
xi_25
+
xi_27
;
const
double
xi_29
=
f_eq_common
+
xi_23
+
xi_27
;
const
double
xi_30
=
f_eq_common
+
xi_24
+
xi_27
;
const
double
xi_27
=
f_eq_common
+
rho
*-
0.333333333333333
;
const
double
xi_28
=
xi_25
+
xi_27
;
const
double
xi_29
=
xi_23
+
xi_27
;
const
double
xi_30
=
xi_24
+
xi_27
;
const
double
xi_2
=
xi_24
*
2
+
xi_26
;
const
double
xi_3
=
xi_23
*
2
+
xi_26
;
const
double
xi_4
=
xi_25
*
2
+
xi_26
;
...
...
@@ -183,7 +183,7 @@ void UniformGridGPU_LbKernel::operator() ( IBlock * block , cudaStream_t stream
const
int64_t
_stride_pdfs_2
=
int64_t
(
pdfs
->
zStride
());
const
int64_t
_stride_pdfs_3
=
int64_t
(
pdfs
->
fStride
());
dim3
_block
(
int
(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
)),
int
(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
)),
int
(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
)));
dim3
_grid
(
int
((
(
_size_pdfs_0
-
2
)
%
int
(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
:
(
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_1
-
2
)
%
int
(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
:
(
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_2
-
2
)
%
int
(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
:
(
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
)
+
1
)));
dim3
_grid
(
int
((
(
_size_pdfs_0
-
2
)
%
(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
:
(
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_1
-
2
)
%
(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
:
(
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_2
-
2
)
%
(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
:
(
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
)
+
1
)));
internal_UniformGridGPU_LbKernel
::
UniformGridGPU_LbKernel
<<<
_grid
,
_block
,
0
,
stream
>>>
(
_data_pdfs
,
_data_pdfs_tmp
,
_size_pdfs_0
,
_size_pdfs_1
,
_size_pdfs_2
,
_stride_pdfs_0
,
_stride_pdfs_1
,
_stride_pdfs_2
,
_stride_pdfs_3
,
omega
);
pdfs
->
swapDataPointers
(
pdfs_tmp
);
...
...
@@ -212,12 +212,12 @@ void UniformGridGPU_LbKernel::inner( IBlock * block , cudaStream_t stream )
inner
.
expand
(
-
1
);
WALBERLA_ASSERT_GREATER_EQUAL
(
inner
.
xMin
()
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
inner
.
zMin
()
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
inner
.
yMin
()
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
inner
.
zMin
()
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
double
*
const
_data_pdfs
=
pdfs
->
dataAt
(
inner
.
xMin
()
-
1
,
inner
.
yMin
()
-
1
,
inner
.
zMin
()
-
1
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
inner
.
xMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
inner
.
zMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
inner
.
yMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
inner
.
zMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
double
*
_data_pdfs_tmp
=
pdfs_tmp
->
dataAt
(
inner
.
xMin
()
-
1
,
inner
.
yMin
()
-
1
,
inner
.
zMin
()
-
1
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
xSizeWithGhostLayer
(),
int64_t
(
inner
.
xSize
()
+
2
));
const
int64_t
_size_pdfs_0
=
int64_t
(
inner
.
xSize
()
+
2
);
...
...
@@ -230,7 +230,7 @@ void UniformGridGPU_LbKernel::inner( IBlock * block , cudaStream_t stream )
const
int64_t
_stride_pdfs_2
=
int64_t
(
pdfs
->
zStride
());
const
int64_t
_stride_pdfs_3
=
int64_t
(
pdfs
->
fStride
());
dim3
_block
(
int
(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
)),
int
(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
)),
int
(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
)));
dim3
_grid
(
int
((
(
_size_pdfs_0
-
2
)
%
int
(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
:
(
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_1
-
2
)
%
int
(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
:
(
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_2
-
2
)
%
int
(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
:
(
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
)
+
1
)));
dim3
_grid
(
int
((
(
_size_pdfs_0
-
2
)
%
(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
:
(
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_1
-
2
)
%
(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
:
(
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_2
-
2
)
%
(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
:
(
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
)
+
1
)));
internal_UniformGridGPU_LbKernel
::
UniformGridGPU_LbKernel
<<<
_grid
,
_block
,
0
,
stream
>>>
(
_data_pdfs
,
_data_pdfs_tmp
,
_size_pdfs_0
,
_size_pdfs_1
,
_size_pdfs_2
,
_stride_pdfs_0
,
_stride_pdfs_1
,
_stride_pdfs_2
,
_stride_pdfs_3
,
omega
);
}
...
...
@@ -284,13 +284,13 @@ void UniformGridGPU_LbKernel::outer( IBlock * block , cudaStream_t stream )
for
(
auto
&
ci
:
layers
)
{
parallelSection_
.
run
([
&
](
auto
s
)
{
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
xMin
()
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
yMin
()
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
zMin
()
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
xMin
()
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
double
*
const
_data_pdfs
=
pdfs
->
dataAt
(
ci
.
xMin
()
-
1
,
ci
.
yMin
()
-
1
,
ci
.
zMin
()
-
1
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
xMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
yMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
zMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
xMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
double
*
_data_pdfs_tmp
=
pdfs_tmp
->
dataAt
(
ci
.
xMin
()
-
1
,
ci
.
yMin
()
-
1
,
ci
.
zMin
()
-
1
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
xSizeWithGhostLayer
(),
int64_t
(
ci
.
xSize
()
+
2
));
const
int64_t
_size_pdfs_0
=
int64_t
(
ci
.
xSize
()
+
2
);
...
...
@@ -303,7 +303,7 @@ void UniformGridGPU_LbKernel::outer( IBlock * block , cudaStream_t stream )
const
int64_t
_stride_pdfs_2
=
int64_t
(
pdfs
->
zStride
());
const
int64_t
_stride_pdfs_3
=
int64_t
(
pdfs
->
fStride
());
dim3
_block
(
int
(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
)),
int
(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
)),
int
(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
)));
dim3
_grid
(
int
((
(
_size_pdfs_0
-
2
)
%
int
(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
:
(
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_1
-
2
)
%
int
(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
:
(
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_2
-
2
)
%
int
(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
:
(
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
)
+
1
)));
dim3
_grid
(
int
((
(
_size_pdfs_0
-
2
)
%
(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
:
(
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_1
-
2
)
%
(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
:
(
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_2
-
2
)
%
(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
:
(
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
)
+
1
)));
internal_UniformGridGPU_LbKernel
::
UniformGridGPU_LbKernel
<<<
_grid
,
_block
,
0
,
s
>>>
(
_data_pdfs
,
_data_pdfs_tmp
,
_size_pdfs_0
,
_size_pdfs_1
,
_size_pdfs_2
,
_stride_pdfs_0
,
_stride_pdfs_1
,
_stride_pdfs_2
,
_stride_pdfs_3
,
omega
);
});
}
...
...
apps/benchmarks/UniformGridGPU/UniformGridGPU_LbKernel.h
View file @
00570bba
...
...
@@ -51,6 +51,15 @@ public:
:
pdfsID
(
pdfsID_
),
omega
(
omega_
)
{};
~
UniformGridGPU_LbKernel
()
{
for
(
auto
p
:
cache_pdfs_
)
{
delete
p
;
}
}
void
operator
()
(
IBlock
*
block
,
cudaStream_t
stream
=
0
);
void
inner
(
IBlock
*
block
,
cudaStream_t
stream
=
0
);
...
...
apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.cu
View file @
00570bba
...
...
@@ -63,9 +63,9 @@ static FUNC_PREFIX void boundary_UniformGridGPU_NoSlip(uint8_t * const _data_ind
uint8_t
*
const
_data_indexVector_112
=
_data_indexVector
+
12
;
const
int32_t
dir
=
*
((
int32_t
*
)(
&
_data_indexVector_112
[
16
*
blockDim
.
x
*
blockIdx
.
x
+
16
*
threadIdx
.
x
]));
double
*
_data_pdfs_
m3B5BEDEA5094B12F
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_1
*
cy
[
dir
]
+
_stride_pdfs_2
*
z
+
_stride_pdfs_2
*
cz
[
dir
]
+
_stride_pdfs_3
*
invdir
[
dir
];
double
*
_data_pdfs_10_20_m
2227275638DDD757
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_2
*
z
+
_stride_pdfs_3
*
dir
;
_data_pdfs_
m3B5BEDEA5094B12F
[
_stride_pdfs_0
*
x
+
_stride_pdfs_0
*
cx
[
dir
]]
=
_data_pdfs_10_20_m
2227275638DDD757
[
_stride_pdfs_0
*
x
];
double
*
_data_pdfs_
1ACA00C755A3ABE3
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_1
*
cy
[
dir
]
+
_stride_pdfs_2
*
z
+
_stride_pdfs_2
*
cz
[
dir
]
+
_stride_pdfs_3
*
invdir
[
dir
];
double
*
_data_pdfs_10_20_m
7D57D887F63BE1DF
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_2
*
z
+
_stride_pdfs_3
*
dir
;
_data_pdfs_
1ACA00C755A3ABE3
[
_stride_pdfs_0
*
x
+
_stride_pdfs_0
*
cx
[
dir
]]
=
_data_pdfs_10_20_m
7D57D887F63BE1DF
[
_stride_pdfs_0
*
x
];
}
}
}
...
...
@@ -97,7 +97,7 @@ void UniformGridGPU_NoSlip::run( IBlock * block, IndexVectors::Type type , cudaS
const
int64_t
_stride_pdfs_2
=
int64_t
(
pdfs
->
zStride
());
const
int64_t
_stride_pdfs_3
=
int64_t
(
pdfs
->
fStride
());
dim3
_block
(
int
(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
)),
int
(
1
),
int
(
1
));
dim3
_grid
(
int
((
(
indexVectorSize
)
%
int
(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
==
0
?
(
int64_t
)(
indexVectorSize
)
/
(
int64_t
)(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
:
(
(
int64_t
)(
indexVectorSize
)
/
(
int64_t
)(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
)
+
1
)),
int
(
1
),
int
(
1
));
dim3
_grid
(
int
((
(
indexVectorSize
)
%
(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
==
0
?
(
int64_t
)(
indexVectorSize
)
/
(
int64_t
)(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
:
(
(
int64_t
)(
indexVectorSize
)
/
(
int64_t
)(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
)
+
1
)),
int
(
1
),
int
(
1
));
internal_boundary_UniformGridGPU_NoSlip
::
boundary_UniformGridGPU_NoSlip
<<<
_grid
,
_block
,
0
,
stream
>>>
(
_data_indexVector
,
_data_pdfs
,
_stride_pdfs_0
,
_stride_pdfs_1
,
_stride_pdfs_2
,
_stride_pdfs_3
,
indexVectorSize
);
}
...
...
apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.cu
View file @
00570bba
This diff is collapsed.
Click to expand it.
apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.cu
View file @
00570bba
...
...
@@ -63,9 +63,9 @@ static FUNC_PREFIX void boundary_UniformGridGPU_UBB(uint8_t * const _data_indexV
uint8_t
*
const
_data_indexVector_112
=
_data_indexVector
+
12
;
const
int32_t
dir
=
*
((
int32_t
*
)(
&
_data_indexVector_112
[
16
*
blockDim
.
x
*
blockIdx
.
x
+
16
*
threadIdx
.
x
]));
double
*
_data_pdfs_
m3B5BEDEA5094B12F
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_1
*
cy
[
dir
]
+
_stride_pdfs_2
*
z
+
_stride_pdfs_2
*
cz
[
dir
]
+
_stride_pdfs_3
*
invdir
[
dir
];
double
*
_data_pdfs_10_20_m
2227275638DDD757
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_2
*
z
+
_stride_pdfs_3
*
dir
;
_data_pdfs_
m3B5BEDEA5094B12F
[
_stride_pdfs_0
*
x
+
_stride_pdfs_0
*
cx
[
dir
]]
=
-
0.30000000000000004
*
cx
[
dir
]
*
weights
[
dir
]
+
_data_pdfs_10_20_m
2227275638DDD757
[
_stride_pdfs_0
*
x
];
double
*
_data_pdfs_
1ACA00C755A3ABE3
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_1
*
cy
[
dir
]
+
_stride_pdfs_2
*
z
+
_stride_pdfs_2
*
cz
[
dir
]
+
_stride_pdfs_3
*
invdir
[
dir
];
double
*
_data_pdfs_10_20_m
7D57D887F63BE1DF
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_2
*
z
+
_stride_pdfs_3
*
dir
;
_data_pdfs_
1ACA00C755A3ABE3
[
_stride_pdfs_0
*
x
+
_stride_pdfs_0
*
cx
[
dir
]]
=
-
0.30000000000000004
*
cx
[
dir
]
*
weights
[
dir
]
+
_data_pdfs_10_20_m
7D57D887F63BE1DF
[
_stride_pdfs_0
*
x
];
}
}
}
...
...
@@ -97,7 +97,7 @@ void UniformGridGPU_UBB::run( IBlock * block, IndexVectors::Type type , cudaStre
const
int64_t
_stride_pdfs_2
=
int64_t
(
pdfs
->
zStride
());
const
int64_t
_stride_pdfs_3
=
int64_t
(
pdfs
->
fStride
());
dim3
_block
(
int
(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
)),
int
(
1
),
int
(
1
));
dim3
_grid
(
int
((
(
indexVectorSize
)
%
int
(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
==
0
?
(
int64_t
)(
indexVectorSize
)
/
(
int64_t
)(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
:
(
(
int64_t
)(
indexVectorSize
)
/
(
int64_t
)(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
)
+
1
)),
int
(
1
),
int
(
1
));
dim3
_grid
(
int
((
(
indexVectorSize
)
%
(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
==
0
?
(
int64_t
)(
indexVectorSize
)
/
(
int64_t
)(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
:
(
(
int64_t
)(
indexVectorSize
)
/
(
int64_t
)(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
)
+
1
)),
int
(
1
),
int
(
1
));
internal_boundary_UniformGridGPU_UBB
::
boundary_UniformGridGPU_UBB
<<<
_grid
,
_block
,
0
,
stream
>>>
(
_data_indexVector
,
_data_pdfs
,
_stride_pdfs_0
,
_stride_pdfs_1
,
_stride_pdfs_2
,
_stride_pdfs_3
,
indexVectorSize
);
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment