Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Florian Weik
waLBerla
Commits
b88d44d4
Commit
b88d44d4
authored
Jan 22, 2019
by
Martin Bauer
Browse files
Warning fixes in GPU communication & benchmark
parent
3830bfa6
Changes
10
Hide whitespace changes
Inline
Side-by-side
apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.cpp
View file @
b88d44d4
...
...
@@ -30,6 +30,21 @@
#define FUNC_PREFIX
#ifdef WALBERLA_CXX_COMPILER_IS_GNU
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#pragma GCC diagnostic ignored "-Wunused-parameter"
#pragma GCC diagnostic ignored "-Wshadow"
#endif
#ifdef WALBERLA_CXX_COMPILER_IS_CLANG
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-variable"
#pragma clang diagnostic ignored "-Wunused-parameter"
#pragma clang diagnostic ignored "-Wshadow"
#endif
using
namespace
std
;
namespace
walberla
{
...
...
@@ -458,15 +473,15 @@ void UniformGridGPU_LatticeModel::Sweep::streamCollide( IBlock * block, const ui
auto
&
omega
=
lm
.
omega
;
WALBERLA_ASSERT_GREATER_EQUAL
(
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
double
*
const
_data_pdfs
=
pdfs
->
dataAt
(
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
0
);
double
*
const
_data_pdfs
=
pdfs
->
dataAt
(
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
0
,
0
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
double
*
_data_pdfs_tmp
=
pdfs_tmp
->
dataAt
(
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
xSizeWithGhostLayer
(),
int64_t
(
pdfs
->
xSize
()
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
));
const
int64_t
_size_pdfs_0
=
int64_t
(
pdfs
->
xSize
()
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
ySizeWithGhostLayer
(),
int64_t
(
pdfs
->
ySize
()
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
));
const
int64_t
_size_pdfs_1
=
int64_t
(
pdfs
->
ySize
()
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
zSizeWithGhostLayer
(),
int64_t
(
pdfs
->
zSize
()
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
));
const
int64_t
_size_pdfs_2
=
int64_t
(
pdfs
->
zSize
()
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
);
double
*
_data_pdfs_tmp
=
pdfs_tmp
->
dataAt
(
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
0
,
0
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
xSizeWithGhostLayer
(),
int64_t
(
cell_idx_c
(
pdfs
->
xSize
()
)
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
));
const
int64_t
_size_pdfs_0
=
int64_t
(
cell_idx_c
(
pdfs
->
xSize
()
)
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
ySizeWithGhostLayer
(),
int64_t
(
cell_idx_c
(
pdfs
->
ySize
()
)
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
));
const
int64_t
_size_pdfs_1
=
int64_t
(
cell_idx_c
(
pdfs
->
ySize
()
)
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
zSizeWithGhostLayer
(),
int64_t
(
cell_idx_c
(
pdfs
->
zSize
()
)
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
));
const
int64_t
_size_pdfs_2
=
int64_t
(
cell_idx_c
(
pdfs
->
zSize
()
)
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
);
const
int64_t
_stride_pdfs_0
=
int64_t
(
pdfs
->
xStride
());
const
int64_t
_stride_pdfs_1
=
int64_t
(
pdfs
->
yStride
());
const
int64_t
_stride_pdfs_2
=
int64_t
(
pdfs
->
zStride
());
...
...
@@ -486,13 +501,13 @@ void UniformGridGPU_LatticeModel::Sweep::collide( IBlock * block, const uint_t n
auto
&
omega
=
lm
.
omega
;
WALBERLA_ASSERT_GREATER_EQUAL
(
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
double
*
_data_pdfs
=
pdfs
->
dataAt
(
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
xSizeWithGhostLayer
(),
int64_t
(
pdfs
->
xSize
()
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
));
const
int64_t
_size_pdfs_0
=
int64_t
(
pdfs
->
xSize
()
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
ySizeWithGhostLayer
(),
int64_t
(
pdfs
->
ySize
()
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
));
const
int64_t
_size_pdfs_1
=
int64_t
(
pdfs
->
ySize
()
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
zSizeWithGhostLayer
(),
int64_t
(
pdfs
->
zSize
()
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
));
const
int64_t
_size_pdfs_2
=
int64_t
(
pdfs
->
zSize
()
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
);
double
*
_data_pdfs
=
pdfs
->
dataAt
(
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
0
,
0
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
xSizeWithGhostLayer
(),
int64_t
(
cell_idx_c
(
pdfs
->
xSize
()
)
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
));
const
int64_t
_size_pdfs_0
=
int64_t
(
cell_idx_c
(
pdfs
->
xSize
()
)
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
ySizeWithGhostLayer
(),
int64_t
(
cell_idx_c
(
pdfs
->
ySize
()
)
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
));
const
int64_t
_size_pdfs_1
=
int64_t
(
cell_idx_c
(
pdfs
->
ySize
()
)
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
zSizeWithGhostLayer
(),
int64_t
(
cell_idx_c
(
pdfs
->
zSize
()
)
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
));
const
int64_t
_size_pdfs_2
=
int64_t
(
cell_idx_c
(
pdfs
->
zSize
()
)
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
);
const
int64_t
_stride_pdfs_0
=
int64_t
(
pdfs
->
xStride
());
const
int64_t
_stride_pdfs_1
=
int64_t
(
pdfs
->
yStride
());
const
int64_t
_stride_pdfs_2
=
int64_t
(
pdfs
->
zStride
());
...
...
@@ -519,15 +534,15 @@ void UniformGridGPU_LatticeModel::Sweep::stream( IBlock * block, const uint_t nu
WALBERLA_ASSERT_GREATER_EQUAL
(
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
double
*
const
_data_pdfs
=
pdfs
->
dataAt
(
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
0
);
double
*
const
_data_pdfs
=
pdfs
->
dataAt
(
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
0
,
0
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
double
*
_data_pdfs_tmp
=
pdfs_tmp
->
dataAt
(
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
xSizeWithGhostLayer
(),
int64_t
(
pdfs
->
xSize
()
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
));
const
int64_t
_size_pdfs_0
=
int64_t
(
pdfs
->
xSize
()
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
ySizeWithGhostLayer
(),
int64_t
(
pdfs
->
ySize
()
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
));
const
int64_t
_size_pdfs_1
=
int64_t
(
pdfs
->
ySize
()
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
zSizeWithGhostLayer
(),
int64_t
(
pdfs
->
zSize
()
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
));
const
int64_t
_size_pdfs_2
=
int64_t
(
pdfs
->
zSize
()
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
);
double
*
_data_pdfs_tmp
=
pdfs_tmp
->
dataAt
(
-
cell_idx_c
(
numberOfGhostLayersToInclude
)
-
1
,
0
,
0
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
xSizeWithGhostLayer
(),
int64_t
(
cell_idx_c
(
pdfs
->
xSize
()
)
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
));
const
int64_t
_size_pdfs_0
=
int64_t
(
cell_idx_c
(
pdfs
->
xSize
()
)
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
ySizeWithGhostLayer
(),
int64_t
(
cell_idx_c
(
pdfs
->
ySize
()
)
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
));
const
int64_t
_size_pdfs_1
=
int64_t
(
cell_idx_c
(
pdfs
->
ySize
()
)
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
zSizeWithGhostLayer
(),
int64_t
(
cell_idx_c
(
pdfs
->
zSize
()
)
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
));
const
int64_t
_size_pdfs_2
=
int64_t
(
cell_idx_c
(
pdfs
->
zSize
()
)
+
2
*
cell_idx_c
(
numberOfGhostLayersToInclude
)
+
2
);
const
int64_t
_stride_pdfs_0
=
int64_t
(
pdfs
->
xStride
());
const
int64_t
_stride_pdfs_1
=
int64_t
(
pdfs
->
yStride
());
const
int64_t
_stride_pdfs_2
=
int64_t
(
pdfs
->
zStride
());
...
...
@@ -569,3 +584,11 @@ mpi::RecvBuffer & operator>> (mpi::RecvBuffer & buf, ::walberla::lbm::UniformGri
}
// namespace mpi
}
// namespace walberla
#ifdef WALBERLA_CXX_COMPILER_IS_GNU
#pragma GCC diagnostic pop
#endif
#ifdef WALBERLA_CXX_COMPILER_IS_CLANG
#pragma clang diagnostic pop
#endif
\ No newline at end of file
apps/benchmarks/UniformGridGPU/UniformGridGPU_LatticeModel.h
View file @
b88d44d4
...
...
@@ -50,6 +50,11 @@
#pragma GCC diagnostic ignored "-Wunused-parameter"
#endif
#ifdef WALBERLA_CXX_COMPILER_IS_CLANG
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-variable"
#pragma clang diagnostic ignored "-Wunused-parameter"
#endif
...
...
@@ -734,4 +739,8 @@ struct ShearRate<UniformGridGPU_LatticeModel>
#ifdef WALBERLA_CXX_COMPILER_IS_GNU
#pragma GCC diagnostic pop
#endif
#ifdef WALBERLA_CXX_COMPILER_IS_CLANG
#pragma clang diagnostic pop
#endif
\ No newline at end of file
apps/benchmarks/UniformGridGPU/UniformGridGPU_LbKernel.cu
View file @
b88d44d4
...
...
@@ -169,15 +169,15 @@ void UniformGridGPU_LbKernel::operator() ( IBlock * block , cudaStream_t stream
}
WALBERLA_ASSERT_GREATER_EQUAL
(
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
double
*
const
_data_pdfs
=
pdfs
->
dataAt
(
-
1
,
-
1
,
-
1
,
0
);
double
*
const
_data_pdfs
=
pdfs
->
dataAt
(
-
1
,
0
,
0
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
double
*
_data_pdfs_tmp
=
pdfs_tmp
->
dataAt
(
-
1
,
-
1
,
-
1
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
xSizeWithGhostLayer
(),
int64_t
(
pdfs
->
xSize
()
+
2
));
const
int64_t
_size_pdfs_0
=
int64_t
(
pdfs
->
xSize
()
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
ySizeWithGhostLayer
(),
int64_t
(
pdfs
->
ySize
()
+
2
));
const
int64_t
_size_pdfs_1
=
int64_t
(
pdfs
->
ySize
()
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
zSizeWithGhostLayer
(),
int64_t
(
pdfs
->
zSize
()
+
2
));
const
int64_t
_size_pdfs_2
=
int64_t
(
pdfs
->
zSize
()
+
2
);
double
*
_data_pdfs_tmp
=
pdfs_tmp
->
dataAt
(
-
1
,
0
,
0
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
xSizeWithGhostLayer
(),
int64_t
(
cell_idx_c
(
pdfs
->
xSize
()
)
+
2
));
const
int64_t
_size_pdfs_0
=
int64_t
(
cell_idx_c
(
pdfs
->
xSize
()
)
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
ySizeWithGhostLayer
(),
int64_t
(
cell_idx_c
(
pdfs
->
ySize
()
)
+
2
));
const
int64_t
_size_pdfs_1
=
int64_t
(
cell_idx_c
(
pdfs
->
ySize
()
)
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
zSizeWithGhostLayer
(),
int64_t
(
cell_idx_c
(
pdfs
->
zSize
()
)
+
2
));
const
int64_t
_size_pdfs_2
=
int64_t
(
cell_idx_c
(
pdfs
->
zSize
()
)
+
2
);
const
int64_t
_stride_pdfs_0
=
int64_t
(
pdfs
->
xStride
());
const
int64_t
_stride_pdfs_1
=
int64_t
(
pdfs
->
yStride
());
const
int64_t
_stride_pdfs_2
=
int64_t
(
pdfs
->
zStride
());
...
...
@@ -219,12 +219,12 @@ void UniformGridGPU_LbKernel::inner( IBlock * block , cudaStream_t stream )
WALBERLA_ASSERT_GREATER_EQUAL
(
inner
.
yMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
inner
.
zMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
double
*
_data_pdfs_tmp
=
pdfs_tmp
->
dataAt
(
inner
.
xMin
()
-
1
,
inner
.
yMin
()
-
1
,
inner
.
zMin
()
-
1
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
xSizeWithGhostLayer
(),
int64_t
(
inner
.
xSize
()
+
2
));
const
int64_t
_size_pdfs_0
=
int64_t
(
inner
.
xSize
()
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
ySizeWithGhostLayer
(),
int64_t
(
inner
.
ySize
()
+
2
));
const
int64_t
_size_pdfs_1
=
int64_t
(
inner
.
ySize
()
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
zSizeWithGhostLayer
(),
int64_t
(
inner
.
zSize
()
+
2
));
const
int64_t
_size_pdfs_2
=
int64_t
(
inner
.
zSize
()
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
xSizeWithGhostLayer
(),
int64_t
(
cell_idx_c
(
inner
.
xSize
()
)
+
2
));
const
int64_t
_size_pdfs_0
=
int64_t
(
cell_idx_c
(
inner
.
xSize
()
)
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
ySizeWithGhostLayer
(),
int64_t
(
cell_idx_c
(
inner
.
ySize
()
)
+
2
));
const
int64_t
_size_pdfs_1
=
int64_t
(
cell_idx_c
(
inner
.
ySize
()
)
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
zSizeWithGhostLayer
(),
int64_t
(
cell_idx_c
(
inner
.
zSize
()
)
+
2
));
const
int64_t
_size_pdfs_2
=
int64_t
(
cell_idx_c
(
inner
.
zSize
()
)
+
2
);
const
int64_t
_stride_pdfs_0
=
int64_t
(
pdfs
->
xStride
());
const
int64_t
_stride_pdfs_1
=
int64_t
(
pdfs
->
yStride
());
const
int64_t
_stride_pdfs_2
=
int64_t
(
pdfs
->
zStride
());
...
...
@@ -284,20 +284,20 @@ void UniformGridGPU_LbKernel::outer( IBlock * block , cudaStream_t stream )
for
(
auto
&
ci
:
layers
)
{
parallelSection_
.
run
([
&
](
auto
s
)
{
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
xMin
()
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
yMin
()
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
zMin
()
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
xMin
()
-
1
,
-
int_c
(
pdfs
->
nrOfGhostLayers
()));
double
*
const
_data_pdfs
=
pdfs
->
dataAt
(
ci
.
xMin
()
-
1
,
ci
.
yMin
()
-
1
,
ci
.
zMin
()
-
1
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
xMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
yMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
zMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
WALBERLA_ASSERT_GREATER_EQUAL
(
ci
.
xMin
()
-
1
,
-
int_c
(
pdfs_tmp
->
nrOfGhostLayers
()));
double
*
_data_pdfs_tmp
=
pdfs_tmp
->
dataAt
(
ci
.
xMin
()
-
1
,
ci
.
yMin
()
-
1
,
ci
.
zMin
()
-
1
,
0
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
xSizeWithGhostLayer
(),
int64_t
(
ci
.
xSize
()
+
2
));
const
int64_t
_size_pdfs_0
=
int64_t
(
ci
.
xSize
()
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
ySizeWithGhostLayer
(),
int64_t
(
ci
.
ySize
()
+
2
));
const
int64_t
_size_pdfs_1
=
int64_t
(
ci
.
ySize
()
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
zSizeWithGhostLayer
(),
int64_t
(
ci
.
zSize
()
+
2
));
const
int64_t
_size_pdfs_2
=
int64_t
(
ci
.
zSize
()
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
xSizeWithGhostLayer
(),
int64_t
(
cell_idx_c
(
ci
.
xSize
()
)
+
2
));
const
int64_t
_size_pdfs_0
=
int64_t
(
cell_idx_c
(
ci
.
xSize
()
)
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
ySizeWithGhostLayer
(),
int64_t
(
cell_idx_c
(
ci
.
ySize
()
)
+
2
));
const
int64_t
_size_pdfs_1
=
int64_t
(
cell_idx_c
(
ci
.
ySize
()
)
+
2
);
WALBERLA_ASSERT_GREATER_EQUAL
(
pdfs
->
zSizeWithGhostLayer
(),
int64_t
(
cell_idx_c
(
ci
.
zSize
()
)
+
2
));
const
int64_t
_size_pdfs_2
=
int64_t
(
cell_idx_c
(
ci
.
zSize
()
)
+
2
);
const
int64_t
_stride_pdfs_0
=
int64_t
(
pdfs
->
xStride
());
const
int64_t
_stride_pdfs_1
=
int64_t
(
pdfs
->
yStride
());
const
int64_t
_stride_pdfs_2
=
int64_t
(
pdfs
->
zStride
());
...
...
apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.cu
View file @
b88d44d4
...
...
@@ -40,6 +40,12 @@ namespace lbm {
#pragma GCC diagnostic ignored "-Wconversion"
#endif
#ifdef __CUDACC__
#pragma push
#pragma diag_suppress = declared_but_not_referenced
#endif
namespace
internal_boundary_UniformGridGPU_NoSlip
{
static
FUNC_PREFIX
void
boundary_UniformGridGPU_NoSlip
(
uint8_t
*
const
_data_indexVector
,
double
*
_data_pdfs
,
int64_t
const
_stride_pdfs_0
,
int64_t
const
_stride_pdfs_1
,
int64_t
const
_stride_pdfs_2
,
int64_t
const
_stride_pdfs_3
,
int64_t
indexVectorSize
)
{
...
...
@@ -63,9 +69,9 @@ static FUNC_PREFIX void boundary_UniformGridGPU_NoSlip(uint8_t * const _data_ind
uint8_t
*
const
_data_indexVector_112
=
_data_indexVector
+
12
;
const
int32_t
dir
=
*
((
int32_t
*
)(
&
_data_indexVector_112
[
16
*
blockDim
.
x
*
blockIdx
.
x
+
16
*
threadIdx
.
x
]));
double
*
_data_pdfs
_1ACA00C755A3ABE3
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_1
*
cy
[
dir
]
+
_stride_pdfs_2
*
z
+
_stride_pdfs_2
*
cz
[
dir
]
+
_stride_pdfs_3
*
invdir
[
dir
];
double
*
_data_pdfs_10_20
_m7D57D887F63BE1DF
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_2
*
z
+
_stride_pdfs_3
*
dir
;
_data_pdfs
_1ACA00C755A3ABE3
[
_stride_pdfs_0
*
x
+
_stride_pdfs_0
*
cx
[
dir
]]
=
_data_pdfs_10_20
_m7D57D887F63BE1DF
[
_stride_pdfs_0
*
x
];
double
*
_data_pdfs
f9cc34cc4e2b6261
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_1
*
cy
[
dir
]
+
_stride_pdfs_2
*
z
+
_stride_pdfs_2
*
cz
[
dir
]
+
_stride_pdfs_3
*
invdir
[
dir
];
double
*
_data_pdfs_10_20
11ac6bf6446d4afa
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_2
*
z
+
_stride_pdfs_3
*
dir
;
_data_pdfs
f9cc34cc4e2b6261
[
_stride_pdfs_0
*
x
+
_stride_pdfs_0
*
cx
[
dir
]]
=
_data_pdfs_10_20
11ac6bf6446d4afa
[
_stride_pdfs_0
*
x
];
}
}
}
...
...
@@ -74,6 +80,10 @@ static FUNC_PREFIX void boundary_UniformGridGPU_NoSlip(uint8_t * const _data_ind
#pragma GCC diagnostic pop
#endif
#ifdef __CUDACC__
#pragma pop
#endif
void
UniformGridGPU_NoSlip
::
run
(
IBlock
*
block
,
IndexVectors
::
Type
type
,
cudaStream_t
stream
)
{
...
...
@@ -118,4 +128,5 @@ void UniformGridGPU_NoSlip::outer( IBlock * block, cudaStream_t stream )
}
// namespace lbm
}
// namespace walberla
\ No newline at end of file
}
// namespace walberla
apps/benchmarks/UniformGridGPU/UniformGridGPU_NoSlip.h
View file @
b88d44d4
...
...
@@ -87,7 +87,7 @@ public:
void
syncGPU
()
{
gpuVectors_
.
resize
(
cpuVectors_
.
size
()
);
for
(
in
t
i
=
0
;
i
<
NUM_TYPES
;
++
i
)
for
(
size_
t
i
=
0
;
i
<
size_t
(
NUM_TYPES
)
;
++
i
)
{
auto
&
gpuVec
=
gpuVectors_
[
i
];
auto
&
cpuVec
=
cpuVectors_
[
i
];
...
...
apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.cu
View file @
b88d44d4
This source diff could not be displayed because it is too large. You can
view the blob
instead.
apps/benchmarks/UniformGridGPU/UniformGridGPU_PackInfo.h
View file @
b88d44d4
...
...
@@ -19,7 +19,7 @@ public:
UniformGridGPU_PackInfo
(
BlockDataID
pdfsID_
)
:
pdfsID
(
pdfsID_
)
{};
virtual
~
UniformGridGPU_PackInfo
()
{}
virtual
void
pack
(
stencil
::
Direction
dir
,
unsigned
char
*
buffer
,
IBlock
*
block
,
cudaStream_t
stream
);
virtual
void
unpack
(
stencil
::
Direction
dir
,
unsigned
char
*
buffer
,
IBlock
*
block
,
cudaStream_t
stream
);
...
...
apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.cu
View file @
b88d44d4
...
...
@@ -40,6 +40,12 @@ namespace lbm {
#pragma GCC diagnostic ignored "-Wconversion"
#endif
#ifdef __CUDACC__
#pragma push
#pragma diag_suppress = declared_but_not_referenced
#endif
namespace
internal_boundary_UniformGridGPU_UBB
{
static
FUNC_PREFIX
void
boundary_UniformGridGPU_UBB
(
uint8_t
*
const
_data_indexVector
,
double
*
_data_pdfs
,
int64_t
const
_stride_pdfs_0
,
int64_t
const
_stride_pdfs_1
,
int64_t
const
_stride_pdfs_2
,
int64_t
const
_stride_pdfs_3
,
int64_t
indexVectorSize
)
{
...
...
@@ -63,9 +69,9 @@ static FUNC_PREFIX void boundary_UniformGridGPU_UBB(uint8_t * const _data_indexV
uint8_t
*
const
_data_indexVector_112
=
_data_indexVector
+
12
;
const
int32_t
dir
=
*
((
int32_t
*
)(
&
_data_indexVector_112
[
16
*
blockDim
.
x
*
blockIdx
.
x
+
16
*
threadIdx
.
x
]));
double
*
_data_pdfs
_1ACA00C755A3ABE3
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_1
*
cy
[
dir
]
+
_stride_pdfs_2
*
z
+
_stride_pdfs_2
*
cz
[
dir
]
+
_stride_pdfs_3
*
invdir
[
dir
];
double
*
_data_pdfs_10_20
_m7D57D887F63BE1DF
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_2
*
z
+
_stride_pdfs_3
*
dir
;
_data_pdfs
_1ACA00C755A3ABE3
[
_stride_pdfs_0
*
x
+
_stride_pdfs_0
*
cx
[
dir
]]
=
-
0.30000000000000004
*
cx
[
dir
]
*
weights
[
dir
]
+
_data_pdfs_10_20
_m7D57D887F63BE1DF
[
_stride_pdfs_0
*
x
];
double
*
_data_pdfs
f9cc34cc4e2b6261
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_1
*
cy
[
dir
]
+
_stride_pdfs_2
*
z
+
_stride_pdfs_2
*
cz
[
dir
]
+
_stride_pdfs_3
*
invdir
[
dir
];
double
*
_data_pdfs_10_20
11ac6bf6446d4afa
=
_data_pdfs
+
_stride_pdfs_1
*
y
+
_stride_pdfs_2
*
z
+
_stride_pdfs_3
*
dir
;
_data_pdfs
f9cc34cc4e2b6261
[
_stride_pdfs_0
*
x
+
_stride_pdfs_0
*
cx
[
dir
]]
=
-
0.30000000000000004
*
cx
[
dir
]
*
weights
[
dir
]
+
_data_pdfs_10_20
11ac6bf6446d4afa
[
_stride_pdfs_0
*
x
];
}
}
}
...
...
@@ -74,6 +80,10 @@ static FUNC_PREFIX void boundary_UniformGridGPU_UBB(uint8_t * const _data_indexV
#pragma GCC diagnostic pop
#endif
#ifdef __CUDACC__
#pragma pop
#endif
void
UniformGridGPU_UBB
::
run
(
IBlock
*
block
,
IndexVectors
::
Type
type
,
cudaStream_t
stream
)
{
...
...
@@ -118,4 +128,5 @@ void UniformGridGPU_UBB::outer( IBlock * block, cudaStream_t stream )
}
// namespace lbm
}
// namespace walberla
\ No newline at end of file
}
// namespace walberla
apps/benchmarks/UniformGridGPU/UniformGridGPU_UBB.h
View file @
b88d44d4
...
...
@@ -87,7 +87,7 @@ public:
void
syncGPU
()
{
gpuVectors_
.
resize
(
cpuVectors_
.
size
()
);
for
(
in
t
i
=
0
;
i
<
NUM_TYPES
;
++
i
)
for
(
size_
t
i
=
0
;
i
<
size_t
(
NUM_TYPES
)
;
++
i
)
{
auto
&
gpuVec
=
gpuVectors_
[
i
];
auto
&
cpuVec
=
cpuVectors_
[
i
];
...
...
src/cuda/communication/UniformGPUScheme.impl.h
View file @
b88d44d4
...
...
@@ -223,8 +223,8 @@ UniformGPUScheme<Stencil>::UniformGPUScheme( weak_ptr_wrapper <StructuredBlockFo
bufferSystemGPU_
.
setReceiverInfo
(
receiverInfo
);
for
(
auto
it
:
receiverInfo
)
{
bufferSystemCPU_
.
sendBuffer
(
it
.
first
).
resize
(
it
.
second
);
bufferSystemGPU_
.
sendBuffer
(
it
.
first
).
resize
(
it
.
second
);
bufferSystemCPU_
.
sendBuffer
(
it
.
first
).
resize
(
size_t
(
it
.
second
)
);
bufferSystemGPU_
.
sendBuffer
(
it
.
first
).
resize
(
size_t
(
it
.
second
)
);
}
forestModificationStamp_
=
forest
->
getBlockForest
().
getModificationStamp
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment