Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
waLBerla
waLBerla
Commits
00570bba
Commit
00570bba
authored
Jan 09, 2019
by
Martin Bauer
Browse files
Memory leak bugfix - regenerated files
parent
77aed6a0
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.cpp
View file @
00570bba
...
...
@@ -129,7 +129,6 @@ static FUNC_PREFIX void kernel_streamCollide(double * const _data_pdfs, double *
const
double
vel1Term
=
_data_pdfs_20_31_1m1
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_37_1m1
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
+
_data_pdfs_21_315_1m1
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_2m1_311_1m1
[
_stride_pdfs_0
*
ctr_0
];
const
double
vel2Term
=
_data_pdfs_2m1_312_11
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_2m1_313_10
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
+
_data_pdfs_2m1_35_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
rho
=
vel0Term
+
vel1Term
+
vel2Term
+
_data_pdfs_20_30_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_32_11
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_33_10
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
+
_data_pdfs_20_39_11
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
+
_data_pdfs_21_316_11
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_21_317_10
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
+
_data_pdfs_21_36_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
xi_27
=
rho
*-
0.333333333333333
;
const
double
u_0
=
vel0Term
+
xi_18
+
xi_19
-
_data_pdfs_20_33_10
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
-
_data_pdfs_20_37_1m1
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
-
_data_pdfs_2m1_313_10
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
];
const
double
xi_23
=
(
u_0
*
u_0
);
const
double
u_1
=
vel1Term
+
xi_19
+
xi_20
-
_data_pdfs_20_310_11
[
_stride_pdfs_0
*
ctr_0
-
_stride_pdfs_0
]
-
_data_pdfs_20_32_11
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_38_1m1
[
_stride_pdfs_0
*
ctr_0
-
_stride_pdfs_0
]
-
_data_pdfs_2m1_312_11
[
_stride_pdfs_0
*
ctr_0
];
...
...
@@ -146,9 +145,10 @@ static FUNC_PREFIX void kernel_streamCollide(double * const _data_pdfs, double *
const
double
u0Pu2
=
u_0
+
u_2
;
const
double
f_eq_common
=
rho
-
xi_23
-
xi_24
-
xi_25
;
const
double
xi_26
=
f_eq_common
+
rho
*-
0.666666666666667
;
const
double
xi_28
=
f_eq_common
+
xi_25
+
xi_27
;
const
double
xi_29
=
f_eq_common
+
xi_23
+
xi_27
;
const
double
xi_30
=
f_eq_common
+
xi_24
+
xi_27
;
const
double
xi_27
=
f_eq_common
+
rho
*-
0.333333333333333
;
const
double
xi_28
=
xi_25
+
xi_27
;
const
double
xi_29
=
xi_23
+
xi_27
;
const
double
xi_30
=
xi_24
+
xi_27
;
const
double
xi_2
=
xi_24
*
2
+
xi_26
;
const
double
xi_3
=
xi_23
*
2
+
xi_26
;
const
double
xi_4
=
xi_25
*
2
+
xi_26
;
...
...
@@ -195,62 +195,80 @@ static FUNC_PREFIX void kernel_collide(double * _data_pdfs, int64_t const _size_
const
double
xi_5
=
omega
*
0.0416666666666667
;
for
(
int
ctr_2
=
1
;
ctr_2
<
_size_pdfs_2
-
1
;
ctr_2
+=
1
)
{
double
*
_data_pdfs_20_317
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
17
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_39
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
9
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_316
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
16
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_310
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
10
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_312
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
12
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_31
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
_stride_pdfs_3
;
double
*
_data_pdfs_20_314
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
14
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_318
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
18
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_34
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
4
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_38
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
8
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_31
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
_stride_pdfs_3
;
double
*
_data_pdfs_20_311
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
11
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_317
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
17
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_315
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
15
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_37
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
7
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_312
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
12
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_316
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
16
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_311
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
11
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_39
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
9
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_32
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
2
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_313
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
13
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_37
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
7
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_310
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
10
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_34
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
4
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_35
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
5
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_30
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
;
double
*
_data_pdfs_20_32
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
2
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_33
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
3
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_38
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
8
*
_stride_pdfs_3
;
double
*
_data_pdfs_20_30
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
;
double
*
_data_pdfs_20_36
=
_data_pdfs
+
_stride_pdfs_2
*
ctr_2
+
6
*
_stride_pdfs_3
;
for
(
int
ctr_1
=
1
;
ctr_1
<
_size_pdfs_1
-
1
;
ctr_1
+=
1
)
{
double
*
_data_pdfs_20_317_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_317
;
double
*
_data_pdfs_20_39_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_39
;
double
*
_data_pdfs_20_316_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_316
;
double
*
_data_pdfs_20_310_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_310
;
double
*
_data_pdfs_20_312_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_312
;
double
*
_data_pdfs_20_31_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_31
;
double
*
_data_pdfs_20_314_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_314
;
double
*
_data_pdfs_20_318_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_318
;
double
*
_data_pdfs_20_34_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_34
;
double
*
_data_pdfs_20_38_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_38
;
double
*
_data_pdfs_20_31_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_31
;
double
*
_data_pdfs_20_311_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_311
;
double
*
_data_pdfs_20_317_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_317
;
double
*
_data_pdfs_20_315_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_315
;
double
*
_data_pdfs_20_37_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_37
;
double
*
_data_pdfs_20_312_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_312
;
double
*
_data_pdfs_20_316_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_316
;
double
*
_data_pdfs_20_311_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_311
;
double
*
_data_pdfs_20_39_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_39
;
double
*
_data_pdfs_20_32_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_32
;
double
*
_data_pdfs_20_313_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_313
;
double
*
_data_pdfs_20_37_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_37
;
double
*
_data_pdfs_20_310_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_310
;
double
*
_data_pdfs_20_34_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_34
;
double
*
_data_pdfs_20_35_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_35
;
double
*
_data_pdfs_20_30_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_30
;
double
*
_data_pdfs_20_32_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_32
;
double
*
_data_pdfs_20_33_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_33
;
double
*
_data_pdfs_20_38_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_38
;
double
*
_data_pdfs_20_30_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_30
;
double
*
_data_pdfs_20_36_10
=
_stride_pdfs_1
*
ctr_1
+
_data_pdfs_20_36
;
for
(
int
ctr_0
=
1
;
ctr_0
<
_size_pdfs_0
-
1
;
ctr_0
+=
1
)
{
const
double
xi_18
=
-
_data_pdfs_20_317_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
xi_19
=
-
_data_pdfs_20_39_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
xi_20
=
-
_data_pdfs_20_316_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
vel0Term
=
_data_pdfs_20_310_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_314_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_318_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_34_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_38_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
vel1Term
=
_data_pdfs_20_311_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_315_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_31_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_37_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
vel2Term
=
_data_pdfs_20_312_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_313_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_35_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
rho
=
vel0Term
+
vel1Term
+
vel2Term
+
_data_pdfs_20_30_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_316_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_317_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_32_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_33_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_36_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_39_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
xi_27
=
rho
*-
0.333333333333333
;
const
double
u_0
=
vel0Term
+
xi_18
+
xi_19
-
_data_pdfs_20_313_10
[
_stride_pdfs_0
*
ctr_0
]
-
_data_pdfs_20_33_10
[
_stride_pdfs_0
*
ctr_0
]
-
_data_pdfs_20_37_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_18
=
_data_pdfs_20_312_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_19
=
_data_pdfs_20_31_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_20
=
_data_pdfs_20_314_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_21
=
_data_pdfs_20_318_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_22
=
_data_pdfs_20_317_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_23
=
_data_pdfs_20_315_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_24
=
_data_pdfs_20_316_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_25
=
_data_pdfs_20_311_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_26
=
_data_pdfs_20_39_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_27
=
_data_pdfs_20_32_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_28
=
_data_pdfs_20_313_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_29
=
_data_pdfs_20_37_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_30
=
_data_pdfs_20_310_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_31
=
_data_pdfs_20_34_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_32
=
_data_pdfs_20_35_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_33
=
_data_pdfs_20_33_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_34
=
_data_pdfs_20_38_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_35
=
_data_pdfs_20_30_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
Dummy_36
=
_data_pdfs_20_36_10
[
_stride_pdfs_0
*
ctr_0
];
const
double
xi_18
=
-
Dummy_22
;
const
double
xi_19
=
-
Dummy_26
;
const
double
xi_20
=
-
Dummy_24
;
const
double
vel0Term
=
Dummy_20
+
Dummy_21
+
Dummy_30
+
Dummy_31
+
Dummy_34
;
const
double
vel1Term
=
Dummy_19
+
Dummy_23
+
Dummy_25
+
Dummy_29
;
const
double
vel2Term
=
Dummy_18
+
Dummy_28
+
Dummy_32
;
const
double
rho
=
Dummy_22
+
Dummy_24
+
Dummy_26
+
Dummy_27
+
Dummy_33
+
Dummy_35
+
Dummy_36
+
vel0Term
+
vel1Term
+
vel2Term
;
const
double
u_0
=
-
Dummy_28
-
Dummy_29
-
Dummy_33
+
vel0Term
+
xi_18
+
xi_19
;
const
double
xi_23
=
(
u_0
*
u_0
);
const
double
u_1
=
vel1Term
+
xi_19
+
xi
_2
0
-
_data_pdfs_20_310_10
[
_stride_pdfs_0
*
ctr_0
]
-
_data_pdfs_20_312_10
[
_stride_pdfs_0
*
ctr_0
]
-
_data_pdfs_20_32_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_38_10
[
_stride_pdfs_0
*
ctr_0
]
;
const
double
u_1
=
-
Dummy_18
-
Dummy
_2
7
-
Dummy_30
+
Dummy_34
+
vel1Term
+
xi_19
+
xi_20
;
const
double
xi_21
=
-
u_1
;
const
double
xi_24
=
(
u_1
*
u_1
);
const
double
u_2
=
vel2Term
+
xi_18
+
xi
_2
0
+
_data_pdfs_20_311_10
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_20_314_10
[
_stride_pdfs_0
*
ctr_0
]
-
_data_pdfs_20_315_10
[
_stride_pdfs_0
*
ctr_0
]
-
_data_pdfs_20_318_10
[
_stride_pdfs_0
*
ctr_0
]
-
_data_pdfs_20_36_10
[
_stride_pdfs_0
*
ctr_0
]
;
const
double
u_2
=
Dummy_20
-
Dummy_21
-
Dummy
_2
3
+
Dummy_25
-
Dummy_36
+
vel2Term
+
xi_18
+
xi_20
;
const
double
xi_22
=
-
u_2
;
const
double
xi_25
=
(
u_2
*
u_2
);
const
double
u0Mu1
=
u_0
+
xi_21
;
...
...
@@ -261,9 +279,10 @@ static FUNC_PREFIX void kernel_collide(double * _data_pdfs, int64_t const _size_
const
double
u0Pu2
=
u_0
+
u_2
;
const
double
f_eq_common
=
rho
-
xi_23
-
xi_24
-
xi_25
;
const
double
xi_26
=
f_eq_common
+
rho
*-
0.666666666666667
;
const
double
xi_28
=
f_eq_common
+
xi_25
+
xi_27
;
const
double
xi_29
=
f_eq_common
+
xi_23
+
xi_27
;
const
double
xi_30
=
f_eq_common
+
xi_24
+
xi_27
;
const
double
xi_27
=
f_eq_common
+
rho
*-
0.333333333333333
;
const
double
xi_28
=
xi_25
+
xi_27
;
const
double
xi_29
=
xi_23
+
xi_27
;
const
double
xi_30
=
xi_24
+
xi_27
;
const
double
xi_2
=
xi_24
*
2
+
xi_26
;
const
double
xi_3
=
xi_23
*
2
+
xi_26
;
const
double
xi_4
=
xi_25
*
2
+
xi_26
;
...
...
@@ -279,25 +298,25 @@ static FUNC_PREFIX void kernel_collide(double * _data_pdfs, int64_t const _size_
const
double
xi_15
=
(
u0Mu2
*
u0Mu2
)
*
3
+
xi_30
;
const
double
xi_16
=
u0Pu2
*
2
;
const
double
xi_17
=
(
u0Pu2
*
u0Pu2
)
*
3
+
xi_30
;
_data_pdfs_20_30_10
[
_stride_pdfs_0
*
ctr_0
]
=
omega
*
(
f_eq_common
*
0.333333333333333
-
_data_pdfs_20_30_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_30_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_31_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_1
*
(
u
_1
+
xi_
2
-
6
*
_data_pdfs_20_31_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_31_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_32_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_1
*
(
xi_2
+
xi_21
-
6
*
_data_pdfs_20_32_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_32_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_33_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_1
*
(
-
u_0
+
xi_3
-
6
*
_data_pdfs_20_33_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_33_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_34_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_1
*
(
u_0
+
xi_3
-
6
*
_data_pdfs_20_34_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_34_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_35_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_1
*
(
u_2
+
xi_4
-
6
*
_data_pdfs_20_35_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_35_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_36_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_1
*
(
xi_22
+
xi_4
-
6
*
_data_pdfs_20_36_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_36_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_37_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
-
xi_6
+
xi_7
-
24
*
_data_pdfs_20_37_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_37_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_38_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
xi_8
+
xi_9
-
24
*
_data_pdfs_20_38_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_38_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_39_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
-
xi_8
+
xi_9
-
24
*
_data_pdfs_20_39_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_39_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_310_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
xi_6
+
xi_7
-
24
*
_data_pdfs_20_310_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_310_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_311_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
xi_10
+
xi_11
-
24
*
_data_pdfs_20_311_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_311_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_312_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
-
xi_12
+
xi_13
-
24
*
_data_pdfs_20_312_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_312_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_313_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
-
xi_14
+
xi_15
-
24
*
_data_pdfs_20_313_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_313_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_314_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
xi_16
+
xi_17
-
24
*
_data_pdfs_20_314_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_314_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_315_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
xi_12
+
xi_13
-
24
*
_data_pdfs_20_315_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_315_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_316_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
-
xi_10
+
xi_11
-
24
*
_data_pdfs_20_316_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_316_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_317_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
-
xi_16
+
xi_17
-
24
*
_data_pdfs_20_317_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_317_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_318_10
[
_stride_pdfs_0
*
ctr_0
]
=
xi_5
*
(
xi_14
+
xi_15
-
24
*
_data_pdfs_20_318_10
[
_stride_pdfs_0
*
ctr_0
])
+
_data_pdfs_20_318_10
[
_stride_pdfs_0
*
ctr_0
]
;
_data_pdfs_20_30_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_35
+
omega
*
(
-
Dummy_35
+
f_eq_common
*
0.333333333333333
)
;
_data_pdfs_20_31_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy
_1
9
+
xi_
1
*
(
Dummy_19
*-
6
+
u_1
+
xi_2
)
;
_data_pdfs_20_32_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_27
+
xi_1
*
(
Dummy_27
*-
6
+
xi_2
+
xi_21
)
;
_data_pdfs_20_33_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_33
+
xi_1
*
(
Dummy_33
*-
6
-
u_0
+
xi_3
)
;
_data_pdfs_20_34_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_31
+
xi_1
*
(
Dummy_31
*-
6
+
u_0
+
xi_3
)
;
_data_pdfs_20_35_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_32
+
xi_1
*
(
Dummy_32
*-
6
+
u_2
+
xi_4
)
;
_data_pdfs_20_36_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_36
+
xi_1
*
(
Dummy_36
*-
6
+
xi_22
+
xi_4
)
;
_data_pdfs_20_37_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_29
+
xi_5
*
(
Dummy_29
*-
24
-
xi_6
+
xi_7
)
;
_data_pdfs_20_38_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_34
+
xi_5
*
(
Dummy_34
*-
24
+
xi_8
+
xi_9
)
;
_data_pdfs_20_39_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_26
+
xi_5
*
(
Dummy_26
*-
24
-
xi_8
+
xi_9
)
;
_data_pdfs_20_310_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_30
+
xi_5
*
(
Dummy_30
*-
24
+
xi_6
+
xi_7
)
;
_data_pdfs_20_311_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_25
+
xi_5
*
(
Dummy_25
*-
24
+
xi_10
+
xi_11
)
;
_data_pdfs_20_312_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_18
+
xi_5
*
(
Dummy_18
*-
24
-
xi_12
+
xi_13
)
;
_data_pdfs_20_313_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_28
+
xi_5
*
(
Dummy_28
*-
24
-
xi_14
+
xi_15
)
;
_data_pdfs_20_314_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_20
+
xi_5
*
(
Dummy_20
*-
24
+
xi_16
+
xi_17
)
;
_data_pdfs_20_315_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_23
+
xi_5
*
(
Dummy_23
*-
24
+
xi_12
+
xi_13
)
;
_data_pdfs_20_316_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_24
+
xi_5
*
(
Dummy_24
*-
24
-
xi_10
+
xi_11
)
;
_data_pdfs_20_317_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_22
+
xi_5
*
(
Dummy_22
*-
24
-
xi_16
+
xi_17
)
;
_data_pdfs_20_318_10
[
_stride_pdfs_0
*
ctr_0
]
=
Dummy_21
+
xi_5
*
(
Dummy_21
*-
24
+
xi_14
+
xi_15
)
;
}
}
}
...
...
apps/benchmarks/UniformGridGPU/UniformGridGPU_LbKernel.cu
View file @
00570bba
...
...
@@ -73,7 +73,6 @@ static FUNC_PREFIX void UniformGridGPU_LbKernel(double * const _data_pdfs, doubl
double
*
const
_data_pdfs_11_20_32
=
_data_pdfs
+
_stride_pdfs_1
*
ctr_1
+
_stride_pdfs_1
+
_stride_pdfs_2
*
ctr_2
+
2
*
_stride_pdfs_3
;
double
*
const
_data_pdfs_10_21_36
=
_data_pdfs
+
_stride_pdfs_1
*
ctr_1
+
_stride_pdfs_2
*
ctr_2
+
_stride_pdfs_2
+
6
*
_stride_pdfs_3
;
const
double
rho
=
vel0Term
+
vel1Term
+
vel2Term
+
_data_pdfs_10_20_30
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_10_20_33
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
+
_data_pdfs_10_21_317
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
+
_data_pdfs_10_21_36
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_11_20_32
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_11_20_39
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
+
_data_pdfs_11_21_316
[
_stride_pdfs_0
*
ctr_0
];
const
double
xi_27
=
rho
*-
0.333333333333333
;
const
double
u_0
=
vel0Term
+
xi_18
+
xi_19
-
_data_pdfs_10_20_33
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
-
_data_pdfs_10_2m1_313
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
]
-
_data_pdfs_1m1_20_37
[
_stride_pdfs_0
*
ctr_0
+
_stride_pdfs_0
];
const
double
xi_23
=
(
u_0
*
u_0
);
const
double
u_1
=
vel1Term
+
xi_19
+
xi_20
-
_data_pdfs_11_20_310
[
_stride_pdfs_0
*
ctr_0
-
_stride_pdfs_0
]
-
_data_pdfs_11_20_32
[
_stride_pdfs_0
*
ctr_0
]
-
_data_pdfs_11_2m1_312
[
_stride_pdfs_0
*
ctr_0
]
+
_data_pdfs_1m1_20_38
[
_stride_pdfs_0
*
ctr_0
-
_stride_pdfs_0
];
...
...
@@ -90,9 +89,10 @@ static FUNC_PREFIX void UniformGridGPU_LbKernel(double * const _data_pdfs, doubl
const
double
u0Pu2
=
u_0
+
u_2
;
const
double
f_eq_common
=
rho
-
xi_23
-
xi_24
-
xi_25
;
const
double
xi_26
=
f_eq_common
+
rho
*-
0.666666666666667
;
const
double
xi_28
=
f_eq_common
+
xi_25
+
xi_27
;
const
double
xi_29
=
f_eq_common
+
xi_23
+
xi_27
;
const
double
xi_30
=
f_eq_common
+
xi_24
+
xi_27
;
const
double
xi_27
=
f_eq_common
+
rho
*-
0.333333333333333
;
const
double
xi_28
=
xi_25
+
xi_27
;
const
double
xi_29
=
xi_23
+
xi_27
;
const
double
xi_30
=
xi_24
+
xi_27
;
const
double
xi_2
=
xi_24
*
2
+
xi_26
;
const
double
xi_3
=
xi_23
*
2
+
xi_26
;
const
double
xi_4
=
xi_25
*
2
+
xi_26
;
...
...
@@ -183,7 +183,7 @@ void UniformGridGPU_LbKernel::operator() ( IBlock * block , cudaStream_t stream
const
int64_t
_stride_pdfs_2
=
int64_t
(
pdfs
->
zStride
());
const
int64_t
_stride_pdfs_3
=
int64_t
(
pdfs
->
fStride
());
dim3
_block
(
int
(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
)),
int
(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
)),
int
(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
)));
dim3
_grid
(
int
((
(
_size_pdfs_0
-
2
)
%
int
(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
:
(
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_1
-
2
)
%
int
(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
:
(
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_2
-
2
)
%
int
(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
:
(
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
)
+
1
)));
dim3
_grid
(
int
((
(
_size_pdfs_0
-
2
)
%
(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
:
(
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_1
-
2
)
%
(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
:
(
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_2
-
2
)
%
(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
:
(
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
)
+
1
)));
internal_UniformGridGPU_LbKernel
::
UniformGridGPU_LbKernel
<<<
_grid
,
_block
,
0
,
stream
>>>
(
_data_pdfs
,
_data_pdfs_tmp
,
_size_pdfs_0
,
_size_pdfs_1
,
_size_pdfs_2
,
_stride_pdfs_0
,
_stride_pdfs_1
,
_stride_pdfs_2
,
_stride_pdfs_3
,
omega
);
pdfs
->
swapDataPointers
(
pdfs_tmp
);
...
...
@@ -212,12 +212,12 @@ void UniformGridGPU_LbKernel::inner( IBlock * block , cudaStream_t stream )
inner
.
expand
(
-
1
);
WALBERLA_ASSERT_GREATER_EQUAL
(
inner
.
xMin
()
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
inner
.
zMin
()
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
inner
.
yMin
()
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
inner
.
zMin
()
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
double
*
const
_data_pdfs
=
pdfs
->
dataAt
(
inner
.
xMin
()
-
1
,
inner
.
yMin
()
-
1
,
inner
.
zMin
()
-
1
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
inner
.
xMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
inner
.
zMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
inner
.
yMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
inner
.
zMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
double
*
_data_pdfs_tmp
=
pdfs_tmp
->
dataAt
(
inner
.
xMin
()
-
1
,
inner
.
yMin
()
-
1
,
inner
.
zMin
()
-
1
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
xSizeWithGhostLayer
(),
int64_t
(
inner
.
xSize
()
+
2
));
const
int64_t
_size_pdfs_0
=
int64_t
(
inner
.
xSize
()
+
2
);
...
...
@@ -230,7 +230,7 @@ void UniformGridGPU_LbKernel::inner( IBlock * block , cudaStream_t stream )
const
int64_t
_stride_pdfs_2
=
int64_t
(
pdfs
->
zStride
());
const
int64_t
_stride_pdfs_3
=
int64_t
(
pdfs
->
fStride
());
dim3
_block
(
int
(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
)),
int
(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
)),
int
(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
)));
dim3
_grid
(
int
((
(
_size_pdfs_0
-
2
)
%
int
(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
:
(
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_1
-
2
)
%
int
(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
:
(
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_2
-
2
)
%
int
(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
:
(
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
)
+
1
)));
dim3
_grid
(
int
((
(
_size_pdfs_0
-
2
)
%
(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
:
(
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_1
-
2
)
%
(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
:
(
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_2
-
2
)
%
(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
:
(
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
)
+
1
)));
internal_UniformGridGPU_LbKernel
::
UniformGridGPU_LbKernel
<<<
_grid
,
_block
,
0
,
stream
>>>
(
_data_pdfs
,
_data_pdfs_tmp
,
_size_pdfs_0
,
_size_pdfs_1
,
_size_pdfs_2
,
_stride_pdfs_0
,
_stride_pdfs_1
,
_stride_pdfs_2
,
_stride_pdfs_3
,
omega
);
}
...
...
@@ -284,13 +284,13 @@ void UniformGridGPU_LbKernel::outer( IBlock * block , cudaStream_t stream )
for
(
auto
&
ci
:
layers
)
{
parallelSection_
.
run
([
&
](
auto
s
)
{
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
xMin
()
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
yMin
()
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
zMin
()
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
xMin
()
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
double
*
const
_data_pdfs
=
pdfs
->
dataAt
(
ci
.
xMin
()
-
1
,
ci
.
yMin
()
-
1
,
ci
.
zMin
()
-
1
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
xMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
yMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
zMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
xMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
double
*
_data_pdfs_tmp
=
pdfs_tmp
->
dataAt
(
ci
.
xMin
()
-
1
,
ci
.
yMin
()
-
1
,
ci
.
zMin
()
-
1
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
xSizeWithGhostLayer
(),
int64_t
(
ci
.
xSize
()
+
2
));
const
int64_t
_size_pdfs_0
=
int64_t
(
ci
.
xSize
()
+
2
);
...
...
@@ -303,7 +303,7 @@ void UniformGridGPU_LbKernel::outer( IBlock * block , cudaStream_t stream )
const
int64_t
_stride_pdfs_2
=
int64_t
(
pdfs
->
zStride
());
const
int64_t
_stride_pdfs_3
=
int64_t
(
pdfs
->
fStride
());
dim3
_block
(
int
(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
)),
int
(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
)),
int
(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
)));
dim3
_grid
(
int
((
(
_size_pdfs_0
-
2
)
%
int
(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
:
(
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_1
-
2
)
%
int
(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
:
(
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_2
-
2
)
%
int
(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
:
(
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
)
+
1
)));
dim3
_grid
(
int
((
(
_size_pdfs_0
-
2
)
%
(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
:
(
(
int64_t
)(
_size_pdfs_0
-
2
)
/
(
int64_t
)(((
128
<
_size_pdfs_0
-
2
)
?
128
:
_size_pdfs_0
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_1
-
2
)
%
(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
:
(
(
int64_t
)(
_size_pdfs_1
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_1
-
2
)
?
1
:
_size_pdfs_1
-
2
))
)
+
1
)),
int
((
(
_size_pdfs_2
-
2
)
%
(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
==
0
?
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
:
(
(
int64_t
)(
_size_pdfs_2
-
2
)
/
(
int64_t
)(((
1
<
_size_pdfs_2
-
2
)
?
1
:
_size_pdfs_2
-
2
))
)
+
1
)));
internal_UniformGridGPU_LbKernel
::
UniformGridGPU_LbKernel
<<<
_grid
,
_block
,
0
,
s
>>>
(
_data_pdfs
,
_data_pdfs_tmp
,
_size_pdfs_0
,
_size_pdfs_1
,
_size_pdfs_2
,
_stride_pdfs_0
,
_stride_pdfs_1
,
_stride_pdfs_2
,
_stride_pdfs_3
,
omega
);
});
}
...
...
apps/benchmarks/UniformGridGPU/UniformGridGPU_LbKernel.h
View file @
00570bba
...
...
@@ -51,6 +51,15 @@ public:
:
pdfsID
(
pdfsID_
),
omega
(
omega_
)
{};
~
UniformGridGPU_LbKernel
()
{
for
(
auto
p
:
cache_pdfs_
)
{
delete
p
;
}
}
void
operator
()
(
IBlock
*
block
,
cudaStream_t
stream
=
0
);
void
inner
(
IBlock
*
block
,
cudaStream_t
stream
=
0
);
...
...
apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.cu
View file @
00570bba
...
...
@@ -63,9 +63,9 @@ static FUNC_PREFIX void boundary_UniformGridGPU_NoSlip(uint8_t * const _data_ind
uint8_t
*
const
_data_indexVector_112
=
_data_indexVector
+
12
;
const
int32_t
dir
=
*
((
int32_t
*
)(
&
_data_indexVector_112
[
16
*
blockDim
.
x
*
blockIdx
.
x
+
16
*
threadIdx
.
x
]));
double
*
_data_pdfs_
m3B5BEDEA5094B12F
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_1
*
cy
[
dir
]
+
_stride_pdfs_2
*
z
+
_stride_pdfs_2
*
cz
[
dir
]
+
_stride_pdfs_3
*
invdir
[
dir
];
double
*
_data_pdfs_10_20_m
2227275638DDD757
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_2
*
z
+
_stride_pdfs_3
*
dir
;
_data_pdfs_
m3B5BEDEA5094B12F
[
_stride_pdfs_0
*
x
+
_stride_pdfs_0
*
cx
[
dir
]]
=
_data_pdfs_10_20_m
2227275638DDD757
[
_stride_pdfs_0
*
x
];
double
*
_data_pdfs_
1ACA00C755A3ABE3
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_1
*
cy
[
dir
]
+
_stride_pdfs_2
*
z
+
_stride_pdfs_2
*
cz
[
dir
]
+
_stride_pdfs_3
*
invdir
[
dir
];
double
*
_data_pdfs_10_20_m
7D57D887F63BE1DF
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_2
*
z
+
_stride_pdfs_3
*
dir
;
_data_pdfs_
1ACA00C755A3ABE3
[
_stride_pdfs_0
*
x
+
_stride_pdfs_0
*
cx
[
dir
]]
=
_data_pdfs_10_20_m
7D57D887F63BE1DF
[
_stride_pdfs_0
*
x
];
}
}
}
...
...
@@ -97,7 +97,7 @@ void UniformGridGPU_NoSlip::run( IBlock * block, IndexVectors::Type type , cudaS
const
int64_t
_stride_pdfs_2
=
int64_t
(
pdfs
->
zStride
());
const
int64_t
_stride_pdfs_3
=
int64_t
(
pdfs
->
fStride
());
dim3
_block
(
int
(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
)),
int
(
1
),
int
(
1
));
dim3
_grid
(
int
((
(
indexVectorSize
)
%
int
(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
==
0
?
(
int64_t
)(
indexVectorSize
)
/
(
int64_t
)(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
:
(
(
int64_t
)(
indexVectorSize
)
/
(
int64_t
)(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
)
+
1
)),
int
(
1
),
int
(
1
));
dim3
_grid
(
int
((
(
indexVectorSize
)
%
(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
==
0
?
(
int64_t
)(
indexVectorSize
)
/
(
int64_t
)(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
:
(
(
int64_t
)(
indexVectorSize
)
/
(
int64_t
)(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
)
+
1
)),
int
(
1
),
int
(
1
));
internal_boundary_UniformGridGPU_NoSlip
::
boundary_UniformGridGPU_NoSlip
<<<
_grid
,
_block
,
0
,
stream
>>>
(
_data_indexVector
,
_data_pdfs
,
_stride_pdfs_0
,
_stride_pdfs_1
,
_stride_pdfs_2
,
_stride_pdfs_3
,
indexVectorSize
);
}
...
...
apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.cu
View file @
00570bba
This diff is collapsed.
Click to expand it.
apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.cu
View file @
00570bba
...
...
@@ -63,9 +63,9 @@ static FUNC_PREFIX void boundary_UniformGridGPU_UBB(uint8_t * const _data_indexV
uint8_t
*
const
_data_indexVector_112
=
_data_indexVector
+
12
;
const
int32_t
dir
=
*
((
int32_t
*
)(
&
_data_indexVector_112
[
16
*
blockDim
.
x
*
blockIdx
.
x
+
16
*
threadIdx
.
x
]));
double
*
_data_pdfs_
m3B5BEDEA5094B12F
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_1
*
cy
[
dir
]
+
_stride_pdfs_2
*
z
+
_stride_pdfs_2
*
cz
[
dir
]
+
_stride_pdfs_3
*
invdir
[
dir
];
double
*
_data_pdfs_10_20_m
2227275638DDD757
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_2
*
z
+
_stride_pdfs_3
*
dir
;
_data_pdfs_
m3B5BEDEA5094B12F
[
_stride_pdfs_0
*
x
+
_stride_pdfs_0
*
cx
[
dir
]]
=
-
0.30000000000000004
*
cx
[
dir
]
*
weights
[
dir
]
+
_data_pdfs_10_20_m
2227275638DDD757
[
_stride_pdfs_0
*
x
];
double
*
_data_pdfs_
1ACA00C755A3ABE3
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_1
*
cy
[
dir
]
+
_stride_pdfs_2
*
z
+
_stride_pdfs_2
*
cz
[
dir
]
+
_stride_pdfs_3
*
invdir
[
dir
];
double
*
_data_pdfs_10_20_m
7D57D887F63BE1DF
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_2
*
z
+
_stride_pdfs_3
*
dir
;
_data_pdfs_
1ACA00C755A3ABE3
[
_stride_pdfs_0
*
x
+
_stride_pdfs_0
*
cx
[
dir
]]
=
-
0.30000000000000004
*
cx
[
dir
]
*
weights
[
dir
]
+
_data_pdfs_10_20_m
7D57D887F63BE1DF
[
_stride_pdfs_0
*
x
];
}
}
}
...
...
@@ -97,7 +97,7 @@ void UniformGridGPU_UBB::run( IBlock * block, IndexVectors::Type type , cudaStre
const
int64_t
_stride_pdfs_2
=
int64_t
(
pdfs
->
zStride
());
const
int64_t
_stride_pdfs_3
=
int64_t
(
pdfs
->
fStride
());
dim3
_block
(
int
(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
)),
int
(
1
),
int
(
1
));
dim3
_grid
(
int
((
(
indexVectorSize
)
%
int
(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
==
0
?
(
int64_t
)(
indexVectorSize
)
/
(
int64_t
)(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
:
(
(
int64_t
)(
indexVectorSize
)
/
(
int64_t
)(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
)
+
1
)),
int
(
1
),
int
(
1
));
dim3
_grid
(
int
((
(
indexVectorSize
)
%
(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
==
0
?
(
int64_t
)(
indexVectorSize
)
/
(
int64_t
)(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
:
(
(
int64_t
)(
indexVectorSize
)
/
(
int64_t
)(((
256
<
indexVectorSize
)
?
256
:
indexVectorSize
))
)
+
1
)),
int
(
1
),
int
(
1
));
internal_boundary_UniformGridGPU_UBB
::
boundary_UniformGridGPU_UBB
<<<
_grid
,
_block
,
0
,
stream
>>>
(
_data_indexVector
,
_data_pdfs
,
_stride_pdfs_0
,
_stride_pdfs_1
,
_stride_pdfs_2
,
_stride_pdfs_3
,
indexVectorSize
);
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment