Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Florian Weik
waLBerla
Commits
427a8c68
Commit
427a8c68
authored
Oct 31, 2018
by
Martin Bauer
Browse files
Refactored test for GPU communication
- comparison to old CPU transfer communication included
parent
ff97334f
Changes
2
Hide whitespace changes
Inline
Side-by-side
tests/cuda/codegen/EquivalenceTest.cpp
View file @
427a8c68
...
@@ -6,7 +6,6 @@
...
@@ -6,7 +6,6 @@
#include "field/FlagField.h"
#include "field/FlagField.h"
#include "field/AddToStorage.h"
#include "field/AddToStorage.h"
#include "lbm/communication/PdfFieldPackInfo.h"
#include "lbm/communication/PdfFieldPackInfo.h"
#include "lbm/vtk/VTKOutput.h"
#include "lbm/PerformanceLogger.h"
#include "lbm/PerformanceLogger.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "timeloop/all.h"
#include "timeloop/all.h"
...
@@ -58,7 +57,8 @@ void initPdfField( const shared_ptr<StructuredBlockForest> &blocks, BlockDataID
...
@@ -58,7 +57,8 @@ void initPdfField( const shared_ptr<StructuredBlockForest> &blocks, BlockDataID
auto
globalZ
=
real_c
(
offset
[
2
]
+
z
);
auto
globalZ
=
real_c
(
offset
[
2
]
+
z
);
auto
xArg
=
real_c
(
std
::
sin
(
real_c
(
globalX
)
/
real_t
(
4
)
*
real_c
(
domainBB
.
size
(
0
))
));
auto
xArg
=
real_c
(
std
::
sin
(
real_c
(
globalX
)
/
real_t
(
4
)
*
real_c
(
domainBB
.
size
(
0
))
));
auto
zArg
=
real_c
(
std
::
sin
(
real_c
(
globalZ
)
/
real_t
(
4
)
*
real_c
(
domainBB
.
size
(
2
))
));
auto
zArg
=
real_c
(
std
::
sin
(
real_c
(
globalZ
)
/
real_t
(
4
)
*
real_c
(
domainBB
.
size
(
2
))
));
pdfField
->
setToEquilibrium
(
x
,
y
,
z
,
Vector3
<
real_t
>
(
0.05
*
std
::
sin
(
xArg
),
0
,
0.05
*
std
::
cos
(
zArg
)));
pdfField
->
setToEquilibrium
(
x
,
y
,
z
,
Vector3
<
real_t
>
(
real_t
(
0.05
)
*
std
::
sin
(
xArg
),
0
,
real_t
(
0.05
)
*
std
::
cos
(
zArg
)));
);
);
}
}
}
}
...
@@ -82,6 +82,7 @@ int main( int argc, char **argv )
...
@@ -82,6 +82,7 @@ int main( int argc, char **argv )
BlockDataID
flagFieldId
=
field
::
addFlagFieldToStorage
<
FlagField_T
>
(
blocks
,
"flag field"
);
BlockDataID
flagFieldId
=
field
::
addFlagFieldToStorage
<
FlagField_T
>
(
blocks
,
"flag field"
);
const
FlagUID
fluidFlagUID
(
"Fluid"
);
const
FlagUID
fluidFlagUID
(
"Fluid"
);
geometry
::
setNonBoundaryCellsToDomain
<
FlagField_T
>
(
*
blocks
,
flagFieldId
,
fluidFlagUID
);
geometry
::
setNonBoundaryCellsToDomain
<
FlagField_T
>
(
*
blocks
,
flagFieldId
,
fluidFlagUID
);
GeneratedLatticeModel_T
generatedLatticeModel
=
GeneratedLatticeModel_T
(
omega
);
// Part 1 : Native walberla
// Part 1 : Native walberla
...
@@ -99,7 +100,6 @@ int main( int argc, char **argv )
...
@@ -99,7 +100,6 @@ int main( int argc, char **argv )
// Part 2: Generated CPU Version
// Part 2: Generated CPU Version
GeneratedLatticeModel_T
generatedLatticeModel
=
GeneratedLatticeModel_T
(
omega
);
BlockDataID
pdfFieldGeneratedId
=
lbm
::
addPdfFieldToStorage
(
blocks
,
"pdfGenerated"
,
generatedLatticeModel
,
field
::
fzyx
);
BlockDataID
pdfFieldGeneratedId
=
lbm
::
addPdfFieldToStorage
(
blocks
,
"pdfGenerated"
,
generatedLatticeModel
,
field
::
fzyx
);
initPdfField
<
GeneratedPdfField_T
>
(
blocks
,
pdfFieldGeneratedId
);
initPdfField
<
GeneratedPdfField_T
>
(
blocks
,
pdfFieldGeneratedId
);
CpuCommScheme_T
cpuComm
(
blocks
);
CpuCommScheme_T
cpuComm
(
blocks
);
...
@@ -113,6 +113,7 @@ int main( int argc, char **argv )
...
@@ -113,6 +113,7 @@ int main( int argc, char **argv )
// Part 3: Generated GPU Version
// Part 3: Generated GPU Version
bool
overlapCommunication
=
parameters
.
getParameter
<
bool
>
(
"overlapCommunication"
,
true
);
bool
overlapCommunication
=
parameters
.
getParameter
<
bool
>
(
"overlapCommunication"
,
true
);
bool
cudaEnabledMPI
=
parameters
.
getParameter
<
bool
>
(
"cudaEnabledMPI"
,
false
);
bool
cudaEnabledMPI
=
parameters
.
getParameter
<
bool
>
(
"cudaEnabledMPI"
,
false
);
bool
oldCommunication
=
parameters
.
getParameter
<
bool
>
(
"oldCommunication"
,
false
);
BlockDataID
pdfShadowCPU
=
lbm
::
addPdfFieldToStorage
(
blocks
,
"cpu shadow field"
,
generatedLatticeModel
,
field
::
fzyx
);
BlockDataID
pdfShadowCPU
=
lbm
::
addPdfFieldToStorage
(
blocks
,
"cpu shadow field"
,
generatedLatticeModel
,
field
::
fzyx
);
initPdfField
<
GeneratedPdfField_T
>
(
blocks
,
pdfShadowCPU
);
initPdfField
<
GeneratedPdfField_T
>
(
blocks
,
pdfShadowCPU
);
...
@@ -126,10 +127,23 @@ int main( int argc, char **argv )
...
@@ -126,10 +127,23 @@ int main( int argc, char **argv )
gpuComm
.
addPackInfo
(
make_shared
<
pystencils
::
EquivalenceTest_GPUPackInfo
>
(
pdfGpuFieldId
));
gpuComm
.
addPackInfo
(
make_shared
<
pystencils
::
EquivalenceTest_GPUPackInfo
>
(
pdfGpuFieldId
));
auto
runCommunication
=
[
&
]()
{
gpuComm
();
};
auto
runCommunication
=
[
&
]()
{
gpuComm
();
};
CpuCommScheme_T
oldGpuScheme
(
blocks
);
std
::
vector
<
cudaStream_t
>
streams
;
for
(
uint_t
i
=
0
;
i
<
Stencil_T
::
Size
;
++
i
)
{
cudaStream_t
s
;
cudaStreamCreate
(
&
s
);
streams
.
push_back
(
s
);
}
using
OldPackInfo
=
cuda
::
communication
::
GPUPackInfo
<
cuda
::
GPUField
<
real_t
>
>
;
oldGpuScheme
.
addPackInfo
(
make_shared
<
OldPackInfo
>
(
pdfGpuFieldId
,
streams
)
);
SweepTimeloop
gpuTimeLoop
(
blocks
->
getBlockStorage
(),
timesteps
);
SweepTimeloop
gpuTimeLoop
(
blocks
->
getBlockStorage
(),
timesteps
);
if
(
!
overlapCommunication
)
if
(
!
overlapCommunication
)
{
{
gpuTimeLoop
.
add
()
<<
BeforeFunction
(
runCommunication
,
"gpu communication"
)
gpuTimeLoop
.
add
()
<<
(
oldCommunication
?
BeforeFunction
(
oldGpuScheme
)
:
BeforeFunction
(
runCommunication
,
"gpu communication"
))
<<
Sweep
(
cudaLbKernel
,
"LB stream & collide gpu"
);
<<
Sweep
(
cudaLbKernel
,
"LB stream & collide gpu"
);
}
}
else
else
...
...
tests/cuda/codegen/EquivalenceTest.gen.py
View file @
427a8c68
...
@@ -3,6 +3,8 @@ from lbmpy_walberla import generate_lattice_model_files
...
@@ -3,6 +3,8 @@ from lbmpy_walberla import generate_lattice_model_files
from
lbmpy.creationfunctions
import
create_lb_update_rule
from
lbmpy.creationfunctions
import
create_lb_update_rule
from
pystencils_walberla.sweep
import
Sweep
from
pystencils_walberla.sweep
import
Sweep
dtype
=
'float64'
# LB options
# LB options
options
=
{
options
=
{
'method'
:
'srt'
,
'method'
:
'srt'
,
...
@@ -12,14 +14,14 @@ options = {
...
@@ -12,14 +14,14 @@ options = {
'compressible'
:
False
,
'compressible'
:
False
,
'maxwellian_moments'
:
False
,
'maxwellian_moments'
:
False
,
'temporary_field_name'
:
'pdfs_tmp'
,
'temporary_field_name'
:
'pdfs_tmp'
,
'optimization'
:
{
'cse_global'
:
Fals
e
,
'optimization'
:
{
'cse_global'
:
Tru
e
,
'cse_pdfs'
:
Fals
e
,
'cse_pdfs'
:
Tru
e
,
'double_precision'
:
True
}
'double_precision'
:
dtype
==
'float64'
}
}
}
# GPU optimization options
# GPU optimization options
opt
=
{
'gpu_indexing_params'
:
{
'block_size'
:
(
128
,
2
,
1
)},
'data_type'
:
'float64'
}
opt
=
{
'gpu_indexing_params'
:
{
'block_size'
:
(
128
,
1
,
1
)},
'data_type'
:
dtype
}
outer_opt
=
{
'gpu_indexing_params'
:
{
'block_size'
:
(
32
,
32
,
32
)},
'data_type'
:
'float64'
}
outer_opt
=
{
'gpu_indexing_params'
:
{
'block_size'
:
(
32
,
32
,
32
)},
'data_type'
:
dtype
}
def
lb_assignments
():
def
lb_assignments
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment