Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Florian Weik
waLBerla
Commits
427a8c68
Commit
427a8c68
authored
Oct 31, 2018
by
Martin Bauer
Browse files
Refactored test for GPU communication
- comparison to old CPU transfer communication included
parent
ff97334f
Changes
2
Hide whitespace changes
Inline
Side-by-side
tests/cuda/codegen/EquivalenceTest.cpp
View file @
427a8c68
...
...
@@ -6,7 +6,6 @@
#include
"field/FlagField.h"
#include
"field/AddToStorage.h"
#include
"lbm/communication/PdfFieldPackInfo.h"
#include
"lbm/vtk/VTKOutput.h"
#include
"lbm/PerformanceLogger.h"
#include
"blockforest/communication/UniformBufferedScheme.h"
#include
"timeloop/all.h"
...
...
@@ -58,7 +57,8 @@ void initPdfField( const shared_ptr<StructuredBlockForest> &blocks, BlockDataID
auto
globalZ
=
real_c
(
offset
[
2
]
+
z
);
auto
xArg
=
real_c
(
std
::
sin
(
real_c
(
globalX
)
/
real_t
(
4
)
*
real_c
(
domainBB
.
size
(
0
))
));
auto
zArg
=
real_c
(
std
::
sin
(
real_c
(
globalZ
)
/
real_t
(
4
)
*
real_c
(
domainBB
.
size
(
2
))
));
pdfField
->
setToEquilibrium
(
x
,
y
,
z
,
Vector3
<
real_t
>
(
0.05
*
std
::
sin
(
xArg
),
0
,
0.05
*
std
::
cos
(
zArg
)));
pdfField
->
setToEquilibrium
(
x
,
y
,
z
,
Vector3
<
real_t
>
(
real_t
(
0.05
)
*
std
::
sin
(
xArg
),
0
,
real_t
(
0.05
)
*
std
::
cos
(
zArg
)));
);
}
}
...
...
@@ -82,6 +82,7 @@ int main( int argc, char **argv )
BlockDataID
flagFieldId
=
field
::
addFlagFieldToStorage
<
FlagField_T
>
(
blocks
,
"flag field"
);
const
FlagUID
fluidFlagUID
(
"Fluid"
);
geometry
::
setNonBoundaryCellsToDomain
<
FlagField_T
>
(
*
blocks
,
flagFieldId
,
fluidFlagUID
);
GeneratedLatticeModel_T
generatedLatticeModel
=
GeneratedLatticeModel_T
(
omega
);
// Part 1 : Native walberla
...
...
@@ -99,7 +100,6 @@ int main( int argc, char **argv )
// Part 2: Generated CPU Version
GeneratedLatticeModel_T
generatedLatticeModel
=
GeneratedLatticeModel_T
(
omega
);
BlockDataID
pdfFieldGeneratedId
=
lbm
::
addPdfFieldToStorage
(
blocks
,
"pdfGenerated"
,
generatedLatticeModel
,
field
::
fzyx
);
initPdfField
<
GeneratedPdfField_T
>
(
blocks
,
pdfFieldGeneratedId
);
CpuCommScheme_T
cpuComm
(
blocks
);
...
...
@@ -113,6 +113,7 @@ int main( int argc, char **argv )
// Part 3: Generated GPU Version
bool
overlapCommunication
=
parameters
.
getParameter
<
bool
>
(
"overlapCommunication"
,
true
);
bool
cudaEnabledMPI
=
parameters
.
getParameter
<
bool
>
(
"cudaEnabledMPI"
,
false
);
bool
oldCommunication
=
parameters
.
getParameter
<
bool
>
(
"oldCommunication"
,
false
);
BlockDataID
pdfShadowCPU
=
lbm
::
addPdfFieldToStorage
(
blocks
,
"cpu shadow field"
,
generatedLatticeModel
,
field
::
fzyx
);
initPdfField
<
GeneratedPdfField_T
>
(
blocks
,
pdfShadowCPU
);
...
...
@@ -126,10 +127,23 @@ int main( int argc, char **argv )
gpuComm
.
addPackInfo
(
make_shared
<
pystencils
::
EquivalenceTest_GPUPackInfo
>
(
pdfGpuFieldId
));
auto
runCommunication
=
[
&
]()
{
gpuComm
();
};
CpuCommScheme_T
oldGpuScheme
(
blocks
);
std
::
vector
<
cudaStream_t
>
streams
;
for
(
uint_t
i
=
0
;
i
<
Stencil_T
::
Size
;
++
i
)
{
cudaStream_t
s
;
cudaStreamCreate
(
&
s
);
streams
.
push_back
(
s
);
}
using
OldPackInfo
=
cuda
::
communication
::
GPUPackInfo
<
cuda
::
GPUField
<
real_t
>
>
;
oldGpuScheme
.
addPackInfo
(
make_shared
<
OldPackInfo
>
(
pdfGpuFieldId
,
streams
)
);
SweepTimeloop
gpuTimeLoop
(
blocks
->
getBlockStorage
(),
timesteps
);
if
(
!
overlapCommunication
)
{
gpuTimeLoop
.
add
()
<<
BeforeFunction
(
runCommunication
,
"gpu communication"
)
gpuTimeLoop
.
add
()
<<
(
oldCommunication
?
BeforeFunction
(
oldGpuScheme
)
:
BeforeFunction
(
runCommunication
,
"gpu communication"
))
<<
Sweep
(
cudaLbKernel
,
"LB stream & collide gpu"
);
}
else
...
...
tests/cuda/codegen/EquivalenceTest.gen.py
View file @
427a8c68
...
...
@@ -3,6 +3,8 @@ from lbmpy_walberla import generate_lattice_model_files
from
lbmpy.creationfunctions
import
create_lb_update_rule
from
pystencils_walberla.sweep
import
Sweep
dtype
=
'float64'
# LB options
options
=
{
'method'
:
'srt'
,
...
...
@@ -12,14 +14,14 @@ options = {
'compressible'
:
False
,
'maxwellian_moments'
:
False
,
'temporary_field_name'
:
'pdfs_tmp'
,
'optimization'
:
{
'cse_global'
:
Fals
e
,
'cse_pdfs'
:
Fals
e
,
'double_precision'
:
True
}
'optimization'
:
{
'cse_global'
:
Tru
e
,
'cse_pdfs'
:
Tru
e
,
'double_precision'
:
dtype
==
'float64'
}
}
# GPU optimization options
opt
=
{
'gpu_indexing_params'
:
{
'block_size'
:
(
128
,
2
,
1
)},
'data_type'
:
'float64'
}
outer_opt
=
{
'gpu_indexing_params'
:
{
'block_size'
:
(
32
,
32
,
32
)},
'data_type'
:
'float64'
}
opt
=
{
'gpu_indexing_params'
:
{
'block_size'
:
(
128
,
1
,
1
)},
'data_type'
:
dtype
}
outer_opt
=
{
'gpu_indexing_params'
:
{
'block_size'
:
(
32
,
32
,
32
)},
'data_type'
:
dtype
}
def
lb_assignments
():
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment