Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Houman Mirzaalian Dastjerdi
waLBerla
Commits
3bec9d9a
Commit
3bec9d9a
authored
Jun 28, 2018
by
Houman Mirzaalian Dastjerdi
Browse files
add CUDA_ComplexGeometry
-Also modify List.h
parent
e5a2a091
Changes
8
Hide whitespace changes
Inline
Side-by-side
apps/benchmarks/CMakeLists.txt
View file @
3bec9d9a
add_subdirectory
(
ComplexGeometry
)
add_subdirectory
(
ComplexGeometryList
)
add_subdirectory
(
CUDA_ComplexGeometryList
)
add_subdirectory
(
MeshDistance
)
add_subdirectory
(
CouetteFlow
)
add_subdirectory
(
ForcesOnSphereNearPlaneInShearFlow
)
...
...
apps/benchmarks/CUDA_ComplexGeometryList/CMakeLists.txt
0 → 100644
View file @
3bec9d9a
execute_process
(
COMMAND
${
CMAKE_COMMAND
}
-E create_symlink
${
CMAKE_CURRENT_SOURCE_DIR
}
/input.dat
${
CMAKE_CURRENT_BINARY_DIR
}
/input.dat
)
waLBerla_add_executable
(
NAME CUDA_ComplexGeometryList
FILES CUDA_ComplexGeometryList.cpp CUDA_ComplexGeometryList.h
DEPENDS blockforest boundary core domain_decomposition field geometry mesh lbm postprocessing timeloop vtk simd cuda gui
)
apps/benchmarks/CUDA_ComplexGeometryList/CUDA_ComplexGeometryList.cpp
0 → 100644
View file @
3bec9d9a
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file ComplexGeometryList.cpp
//! \author Christian Godenschwager <christian.godenschwager@fau.de>
//
//======================================================================================================================
#include
"blockforest/communication/UniformBufferedScheme.h"
#include
"core/Abort.h"
#include
"core/Environment.h"
#include
"core/config/Config.h"
#include
"core/mpi/MPIManager.h"
#include
"core/timing/RemainingTimeLogger.h"
#include
"lbm/lattice_model/CollisionModel.h"
#include
"lbm/lattice_model/D3Q19.h"
#include
"lbm/list/CellCounters.h"
#include
"lbm/list/List.h"
#include
"lbm/list/ListCommunication.h"
#include
"lbm/list/ListKernel.h"
#include
"lbm/list/ListPressureBoundary.h"
#include
"lbm/list/ListVTK.h"
#include
"lbm/PerformanceEvaluation.h"
#include
"lbm/BlockForestEvaluation.h"
#include
"mesh/TriangleMeshes.h"
#include
"mesh/MeshOperations.h"
#include
"mesh/DistanceComputations.h"
#include
"mesh/MeshIO.h"
#include
"mesh/MatrixVectorOperations.h"
#include
"mesh/blockforest/BlockForestInitialization.h"
#include
"mesh/blockforest/BlockWorkloadMemory.h"
#include
"mesh/blockforest/BlockExclusion.h"
#include
"mesh/blockforest/RefinementSelection.h"
#include
"mesh/distance_octree/DistanceOctree.h"
#include
"mesh/boundary/BoundarySetup.h"
#include
"mesh/boundary/BoundaryInfo.h"
#include
"mesh/boundary/BoundaryLocation.h"
#include
"mesh/boundary/BoundaryUIDFaceDataSource.h"
#include
"mesh/boundary/ColorToBoundaryMapper.h"
#include
"mesh/vtk/VTKMeshWriter.h"
#include
"mesh/vtk/CommonDataSources.h"
#include
"timeloop/SweepTimeloop.h"
#include
"vtk/Initialization.h"
#include
"vtk/VTKOutput.h"
#include
"CUDA_ComplexGeometryList.h"
namespace
complex_geometry
{
using
namespace
walberla
;
using
namespace
cuda
;
template
<
typename
MeshType
>
void
vertexToFaceColor
(
MeshType
&
mesh
,
const
typename
MeshType
::
Color
&
defaultColor
)
{
WALBERLA_CHECK
(
mesh
.
has_vertex_colors
()
);
mesh
.
request_face_colors
();
for
(
auto
faceIt
=
mesh
.
faces_begin
();
faceIt
!=
mesh
.
faces_end
();
++
faceIt
)
{
typename
MeshType
::
Color
vertexColor
;
bool
useVertexColor
=
true
;
auto
vertexIt
=
mesh
.
fv_iter
(
*
faceIt
);
WALBERLA_ASSERT
(
vertexIt
.
is_valid
()
);
vertexColor
=
mesh
.
color
(
*
vertexIt
);
++
vertexIt
;
while
(
vertexIt
.
is_valid
()
&&
useVertexColor
)
{
if
(
vertexColor
!=
mesh
.
color
(
*
vertexIt
)
)
useVertexColor
=
false
;
++
vertexIt
;
}
mesh
.
set_color
(
*
faceIt
,
useVertexColor
?
vertexColor
:
defaultColor
);
}
}
template
<
typename
MeshDistanceType
,
typename
MeshType
>
struct
BoundaryLocationFunction
{
BoundaryLocationFunction
(
const
shared_ptr
<
MeshDistanceType
>
&
meshDistanceObject
,
const
shared_ptr
<
mesh
::
BoundaryLocation
<
MeshType
>
>
&
boundaryLocation
)
:
meshDistanceObject_
(
meshDistanceObject
),
boundaryLocation_
(
boundaryLocation
)
{
}
inline
const
mesh
::
BoundaryInfo
&
operator
()(
const
Vector3
<
real_t
>
&
p
)
const
{
typename
MeshType
::
FaceHandle
fh
;
meshDistanceObject_
->
sqSignedDistance
(
mesh
::
toOpenMesh
(
p
),
fh
);
return
(
*
boundaryLocation_
)[
fh
];
}
shared_ptr
<
MeshDistanceType
>
meshDistanceObject_
;
shared_ptr
<
mesh
::
BoundaryLocation
<
MeshType
>
>
boundaryLocation_
;
};
template
<
typename
MeshDistanceType
,
typename
MeshType
>
inline
BoundaryLocationFunction
<
MeshDistanceType
,
MeshType
>
makeBoundaryLocationFunction
(
const
shared_ptr
<
MeshDistanceType
>
&
meshDistanceObject
,
const
shared_ptr
<
mesh
::
BoundaryLocation
<
MeshType
>
>
&
boundaryLocation
)
{
return
BoundaryLocationFunction
<
MeshDistanceType
,
MeshType
>
(
meshDistanceObject
,
boundaryLocation
);
}
template
<
typename
LBMList
>
class
MyLBMListVTKOutput
{
public:
MyLBMListVTKOutput
(
const
ConstBlockDataID
&
pdfList
)
:
pdfList_
(
pdfList
)
{}
void
operator
()(
std
::
vector
<
shared_ptr
<
vtk
::
BlockCellDataWriterInterface
>
>
&
writers
,
std
::
map
<
std
::
string
,
vtk
::
VTKOutput
::
CellFilter
>
&
filters
,
std
::
map
<
std
::
string
,
vtk
::
VTKOutput
::
BeforeFunction
>
&
/*beforeFunctions*/
)
{
writers
.
push_back
(
make_shared
<
lbm
::
ListVelocityVTKWriter
<
LBMList
,
float
>
>
(
pdfList_
,
"VelocityFromPDF"
)
);
writers
.
push_back
(
make_shared
<
lbm
::
ListDensityVTKWriter
<
LBMList
,
float
>
>
(
pdfList_
,
"DensityFromPDF"
)
);
writers
.
push_back
(
make_shared
<
lbm
::
ListPDFVTKWriter
<
LBMList
,
float
>
>
(
pdfList_
,
"PDFs"
)
);
filters
[
"FluidFilter"
]
=
lbm
::
ListFluidFilter
<
LBMList
>
(
pdfList_
);
}
private:
const
ConstBlockDataID
pdfList_
;
};
// class MyLBMListVTKOutput
template
<
typename
MeshDistanceType
>
struct
MeshDistanceFunction
{
MeshDistanceFunction
(
const
shared_ptr
<
MeshDistanceType
>
&
meshDistanceObject
)
:
meshDistanceObject_
(
meshDistanceObject
)
{
}
inline
real_t
operator
()(
const
Vector3
<
real_t
>
&
p
)
const
{
return
real_c
(
meshDistanceObject_
->
sqSignedDistance
(
mesh
::
toOpenMesh
(
p
)
)
);
}
shared_ptr
<
MeshDistanceType
>
meshDistanceObject_
;
};
template
<
typename
MeshDistanceType
>
inline
MeshDistanceFunction
<
MeshDistanceType
>
makeMeshDistanceFunction
(
const
shared_ptr
<
MeshDistanceType
>
&
meshDistanceObject
)
{
return
MeshDistanceFunction
<
MeshDistanceType
>
(
meshDistanceObject
);
}
int
main
(
int
argc
,
char
**
argv
)
{
Environment
env
(
argc
,
argv
);
mpi
::
MPIManager
::
instance
()
->
useWorldComm
();
WALBERLA_CHECK_NOT_NULLPTR
(
env
.
config
(),
"You have to specify a config file!"
);
Config
::
BlockHandle
configBlock
=
env
.
config
()
->
getBlock
(
"ComplexGeometry"
);
if
(
!
configBlock
)
WALBERLA_ABORT
(
"You have to specify a
\"
ComplexGeometry
\"
block in the configuration file!"
);
const
real_t
omega
=
configBlock
.
getParameter
<
real_t
>
(
"omega"
,
real_t
(
1.4
)
);
const
real_t
dx
=
configBlock
.
getParameter
<
real_t
>
(
"dx"
);
const
Vector3
<
uint_t
>
blockSize
=
configBlock
.
getParameter
<
Vector3
<
uint_t
>
>
(
"blockSize"
);
const
uint_t
timeSteps
=
configBlock
.
getParameter
<
uint_t
>
(
"timeSteps"
);
const
std
::
string
meshFile
=
configBlock
.
getParameter
<
std
::
string
>
(
"meshFile"
);
const
real_t
deltaPressure
=
configBlock
.
getParameter
<
real_t
>
(
"deltaPressure"
);
const
uint_t
perfLoggerInterval
=
configBlock
.
getParameter
<
uint_t
>
(
"perfLoggerInterval"
,
uint_t
(
100
)
);
// Setup LatticeModel
typedef
lbm
::
D3Q19
<
lbm
::
collision_model
::
TRT
,
false
>
MyLatticeModel
;
MyLatticeModel
latticeModel
(
lbm
::
collision_model
::
TRT
::
constructWithMagicNumber
(
omega
)
);
// Load mesh and build DistanceOctree
auto
mesh
=
make_shared
<
mesh
::
TriangleMesh
>
();
mesh
->
request_vertex_colors
();
WALBERLA_LOG_DEVEL_ON_ROOT
(
"Loading mesh"
);
mesh
::
readAndBroadcast
(
meshFile
,
*
mesh
);
vertexToFaceColor
(
*
mesh
,
mesh
::
TriangleMesh
::
Color
(
255
,
255
,
255
)
);
WALBERLA_LOG_DEVEL_ON_ROOT
(
"Adding distance info to mesh"
);
auto
triDist
=
make_shared
<
mesh
::
TriangleDistance
<
mesh
::
TriangleMesh
>
>
(
mesh
);
WALBERLA_LOG_DEVEL_ON_ROOT
(
"Building distance octree"
);
auto
distanceOctree
=
make_shared
<
mesh
::
DistanceOctree
<
mesh
::
TriangleMesh
>
>
(
triDist
);
WALBERLA_LOG_DEVEL_ON_ROOT
(
"done. Octree has height "
<<
distanceOctree
->
height
()
);
// Setup Blockforest
mesh
::
ComplexGeometryStructuredBlockforestCreator
bfc
(
computeAABB
(
*
mesh
),
Vector3
<
real_t
>
(
dx
),
mesh
::
makeExcludeMeshInterior
(
distanceOctree
,
dx
)
);
auto
meshWorkloadMemory
=
mesh
::
makeMeshWorkloadMemory
(
distanceOctree
,
dx
);
meshWorkloadMemory
.
setInsideCellWorkload
(
1
);
meshWorkloadMemory
.
setOutsideCellWorkload
(
0
);
bfc
.
setWorkloadMemorySUIDAssignmentFunction
(
meshWorkloadMemory
);
auto
blocks
=
bfc
.
createStructuredBlockForest
(
blockSize
);
// Setup block data
typedef
lbm
::
List
<
MyLatticeModel
,
lbm
::
LayoutSoA
<
MyLatticeModel
::
Stencil
>
>
MyList
;
BlockDataID
pdfListId
=
lbm
::
addListToStorage
<
MyList
>
(
blocks
,
"LBM list (FIdx)"
,
latticeModel
);
BlockDataID
inflowPressureBoundaryHandling
=
lbm
::
addListPressureBoundaryToStorage
<
MyList
>
(
pdfListId
,
blocks
,
"inflowBoundaryHandling"
);
BlockDataID
outflowPressureBoundaryHandling
=
lbm
::
addListPressureBoundaryToStorage
<
MyList
>
(
pdfListId
,
blocks
,
"outflowBoundaryHandling"
);
// CUDA
BlockDataID
gpu_PdfListSrcID
=
addGPUListToStorage
<
MyList
,
ListOnGpu
<
uint32_t
>
>
(
blocks
,
pdfListId
,
"GPU Field Src"
);
copyToGpu
<
MyList
,
ListOnGpu
<
uint32_t
>
>
(
*
blocks
,
pdfListId
,
gpu_PdfListSrcID
);
// Initialize block data
mesh
::
BoundarySetup
bs
(
blocks
,
makeMeshDistanceFunction
(
distanceOctree
),
uint_t
(
1
)
);
BoundaryUID
noslipUID
(
"noslip"
);
BoundaryUID
inflowUID
(
"inflowBoundaryHandling"
);
BoundaryUID
outflowUID
(
"outflowBoundaryHandling"
);
// attach boundary infos to mesh
mesh
::
ColorToBoundaryMapper
<
mesh
::
TriangleMesh
>
colorToBoundryMapper
((
mesh
::
BoundaryInfo
(
noslipUID
)
));
colorToBoundryMapper
.
set
(
mesh
::
TriangleMesh
::
Color
(
255
,
0
,
0
),
mesh
::
BoundaryInfo
(
inflowUID
)
);
colorToBoundryMapper
.
set
(
mesh
::
TriangleMesh
::
Color
(
0
,
0
,
255
),
mesh
::
BoundaryInfo
(
outflowUID
)
);
colorToBoundryMapper
.
set
(
mesh
::
TriangleMesh
::
Color
(
255
,
255
,
255
),
mesh
::
BoundaryInfo
(
noslipUID
)
);
auto
boundaryLocations
=
colorToBoundryMapper
.
addBoundaryInfoToMesh
(
*
mesh
);
for
(
auto
&
block
:
*
blocks
)
{
MyList
*
lbmList
=
block
.
getData
<
MyList
>
(
pdfListId
);
lbm
::
ListPressureBoundary
<
MyList
>
*
inflowPressureBH
=
block
.
getData
<
lbm
::
ListPressureBoundary
<
MyList
>
>
(
inflowPressureBoundaryHandling
);
lbm
::
ListPressureBoundary
<
MyList
>
*
outflowPressureBH
=
block
.
getData
<
lbm
::
ListPressureBoundary
<
MyList
>
>
(
outflowPressureBoundaryHandling
);
WALBERLA_CHECK_NOT_NULLPTR
(
lbmList
);
WALBERLA_CHECK_NOT_NULLPTR
(
inflowPressureBH
);
WALBERLA_CHECK_NOT_NULLPTR
(
outflowPressureBH
);
std
::
vector
<
Cell
>
fluidCells
=
bs
.
getDomainCells
(
block
,
mesh
::
BoundarySetup
::
INSIDE
);
std
::
map
<
boundary
::
BoundaryUID
,
std
::
vector
<
Cell
>
>
boundaryCells
=
bs
.
getBoundaryCells
(
block
,
mesh
::
BoundarySetup
::
OUTSIDE
,
makeBoundaryLocationFunction
(
distanceOctree
,
boundaryLocations
)
);
lbmList
->
init
(
fluidCells
);
inflowPressureBH
->
init
(
lbmList
,
boundaryCells
[
inflowUID
],
real_t
(
1
)
+
deltaPressure
/
real_t
(
2
)
);
outflowPressureBH
->
init
(
lbmList
,
boundaryCells
[
outflowUID
],
real_t
(
1
)
-
deltaPressure
/
real_t
(
2
)
);
}
// Blockforest output
lbm
::
BlockForestEvaluationBase
<
lbm
::
ListCellCounter
,
lbm
::
ListFluidCellCounter
<
MyList
>
>
blockForestEval
(
blocks
,
lbm
::
ListCellCounter
(
blocks
),
lbm
::
ListFluidCellCounter
<
MyList
>
(
blocks
,
pdfListId
)
);
blockForestEval
.
logResultOnRoot
();
// Setup Timeloop
SweepTimeloop
timeloop
(
blocks
->
getBlockStorage
(),
timeSteps
);
// Communication & Boundary Handling
blockforest
::
communication
::
UniformBufferedScheme
<
typename
MyLatticeModel
::
Stencil
>
listScheme
(
blocks
);
listScheme
.
addPackInfo
(
make_shared
<
lbm
::
ListPackInfo
<
MyList
>
>
(
pdfListId
,
blocks
)
);
timeloop
.
add
()
<<
BeforeFunction
(
listScheme
.
getStartCommunicateFunctor
(),
"list communication start"
)
<<
Sweep
(
lbm
::
ListPressureBoundaryHandling
<
MyList
>
(
inflowPressureBoundaryHandling
),
"PressureBoundaryInflow"
);
timeloop
.
add
()
<<
Sweep
(
lbm
::
ListPressureBoundaryHandling
<
MyList
>
(
outflowPressureBoundaryHandling
),
"PressureBoundaryOutflow"
)
<<
AfterFunction
(
listScheme
.
getWaitFunctor
(),
"list communication wait"
);
// LBM Sweep
timeloop
.
add
()
<<
Sweep
(
lbm
::
ListDefaultTRTSweep
<
MyList
>
(
pdfListId
),
"ListSIMD2SplitTRTSweep"
);
//timeloop.add() << Sweep( lbm::ListSIMD2SplitTRTSweep< MyList, 128 >( pdfListId ), "ListSIMD2SplitTRTSweep" );
// VTK Output
MyLBMListVTKOutput
<
MyList
>
myVTKOutput
(
pdfListId
);
std
::
map
<
std
::
string
,
vtk
::
SelectableOutputFunction
>
vtkOutputFunctions
;
vtk
::
initializeVTKOutput
(
vtkOutputFunctions
,
myVTKOutput
,
blocks
,
env
.
config
()
);
for
(
auto
output
=
vtkOutputFunctions
.
begin
();
output
!=
vtkOutputFunctions
.
end
();
++
output
)
timeloop
.
addFuncBeforeTimeStep
(
output
->
second
.
outputFunction
,
std
::
string
(
"VTK: "
)
+
output
->
first
,
output
->
second
.
requiredGlobalStates
,
output
->
second
.
incompatibleGlobalStates
);
// Utilities
lbm
::
PerformanceEvaluationBase
<
lbm
::
ListCellCounter
,
lbm
::
ListFluidCellCounter
<
MyList
>
>
performanceEval
(
blocks
,
lbm
::
ListCellCounter
(
blocks
),
lbm
::
ListFluidCellCounter
<
MyList
>
(
blocks
,
pdfListId
)
);
WcTimer
performanceTimer
;
timeloop
.
addFuncAfterTimeStep
(
[
&
]()
{
if
(
timeloop
.
getCurrentTimeStep
()
==
0
)
{
performanceTimer
.
start
();
}
else
if
(
timeloop
.
getCurrentTimeStep
()
%
perfLoggerInterval
==
0
)
{
performanceTimer
.
end
();
performanceEval
.
logResultOnRoot
(
perfLoggerInterval
,
performanceTimer
.
last
()
);
performanceTimer
.
start
();
}
},
"PerformanceEvaluation"
);
timeloop
.
addFuncAfterTimeStep
(
timing
::
RemainingTimeLogger
(
timeloop
.
getNrOfTimeSteps
()
)
);
// Run timeloop
WcTimingPool
timingPool
;
timeloop
.
run
(
timingPool
);
timingPool
.
logResultOnRoot
(
walberla
::
WcTimingPool
::
REDUCE_TOTAL
,
true
);
return
EXIT_SUCCESS
;
}
}
// namespace complex_geometry
int
main
(
int
argc
,
char
**
argv
)
{
return
complex_geometry
::
main
(
argc
,
argv
);
}
apps/benchmarks/CUDA_ComplexGeometryList/CUDA_ComplexGeometryList.cu
0 → 100644
View file @
3bec9d9a
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file AddGPUListToStorage.h
//! \ingroup cuda
//! \author Houman Mirzaalian D. <houman.mirzaalian@fau.de>
//
//======================================================================================================================
#include
"CUDA_ComplexGeometryList.h"
apps/benchmarks/CUDA_ComplexGeometryList/CUDA_ComplexGeometryList.h
0 → 100644
View file @
3bec9d9a
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file AddGPUListToStorage.h
//! \ingroup cuda
//! \author Houman Mirzaalian D. <houman.mirzaalian@fau.de>
//
//======================================================================================================================
#pragma once
#include
"lbm/list/List.h"
#include
"lbm/all.h"
#include
<lbm/lattice_model/LatticeModelBase.h>
#include
"lbm/list/List.h"
#include
"lbm/lattice_model/D3Q19.h"
#include
"domain_decomposition/BlockDataID.h"
#include
"cuda/HostFieldAllocator.h"
#include
"cuda/HostFieldAllocator.h"
#include
"cuda/FieldCopy.h"
#include
"cuda/GPUField.h"
#include
"cuda/Kernel.h"
#include
"cuda/AddGPUFieldToStorage.h"
#include
"cuda/communication/GPUPackInfo.h"
#include
"cuda/FieldIndexing.h"
#include
"CUDA_ComplexGeometryList.h"
#include
<type_traits>
namespace
walberla
{
namespace
cuda
{
using
lbm
::
List
;
using
lbm
::
LayoutSoA
;
typedef
lbm
::
D3Q19
<
lbm
::
collision_model
::
TRT
,
false
>
LatticeModel_T
;
template
<
typename
Index_T
=
walberla
::
uint32_t
>
class
ListOnGpu
{
public:
typedef
Index_T
index_t
;
ListOnGpu
()
:
pdfs_
(
nullptr
),
pullIdxs_
(
nullptr
),
size_
(
0
)
{}
~
ListOnGpu
()
{};
void
resize
(
const
size_t
newSize
)
{
cudaFree
(
pdfs_
);
cudaFree
(
pullIdxs_
);
cudaMalloc
(
&
pdfs_
,
sizeof
(
real_t
)
*
newSize
);
cudaMalloc
(
&
pullIdxs_
,
sizeof
(
Index_T
)
*
newSize
);
//free old ptr and reallocate
}
real_t
*
getPdfData
()
{
return
pdfs_
;
}
const
real_t
*
getPdfData
()
const
{
return
pdfs_
;
}
Index_T
*
getPullIndicesData
()
{
return
pullIdxs_
;
}
const
Index_T
*
getPullIndicesData
()
const
{
return
pullIdxs_
;
}
size_t
getSize
()
const
{
return
size_
;
}
private:
real_t
*
pdfs_
;
Index_T
*
pullIdxs_
;
size_t
size_
;
};
template
<
typename
Index_T
>
bool
operator
==
(
const
ListOnGpu
<
Index_T
>
&
lhs
,
const
ListOnGpu
<
Index_T
>
&
rhs
)
{
return
false
;
}
template
<
typename
ListOnCPU
,
typename
ListOnGPU
>
void
copyToGpu
(
const
ListOnCPU
&
src
,
ListOnGPU
&
dst
)
{
static_assert
(
std
::
is_same
<
typename
ListOnCPU
::
index_t
,
typename
ListOnGPU
::
index_t
>::
value
,
""
);
typedef
typename
ListOnCPU
::
index_t
index_t
;
auto
pdfs_cpu
=
src
.
getPdfData
();
auto
pullIdxs_cpu
=
src
.
getPullIndicesData
();
auto
size_cpu
=
src
.
getSize
();
auto
pdfs_gpu
=
dst
.
getPdfData
();
auto
pullIdxs_gpu
=
dst
.
getPullIndicesData
();
auto
size_gpu
=
dst
.
getSize
();
if
(
size_cpu
!=
size_gpu
){
dst
.
resize
(
size_cpu
);
}
// check if size matches, if not reallocate
cudaMemcpy
(
pdfs_gpu
,
pdfs_cpu
,
sizeof
(
real_t
)
*
size_cpu
,
cudaMemcpyHostToDevice
);
cudaMemcpy
(
pullIdxs_gpu
,
pullIdxs_cpu
,
sizeof
(
index_t
)
*
size_cpu
,
cudaMemcpyHostToDevice
);
// memcopy
}
template
<
typename
ListOnGPU
,
typename
ListOnCPU
>
void
copyToCpu
(
const
ListOnGPU
&
src
,
ListOnCPU
&
dst
)
{
static_assert
(
std
::
is_same
<
typename
ListOnCPU
::
index_t
,
typename
ListOnGPU
::
index_t
>::
value
,
""
);
typedef
typename
ListOnCPU
::
index_t
index_t
;
auto
pdfs_cpu
=
dst
.
getPdfData
();
auto
pullIdxs_cpu
=
dst
.
getPullIndicesData
();
auto
size_cpu
=
dst
.
getSize
();
auto
pdfs_gpu
=
src
.
getPdfData
();
auto
pullIdxs_gpu
=
src
.
getPullIndicesData
();
auto
size_gpu
=
src
.
getSize
();
WALBERLA_CHECK_EQUAL
(
size_cpu
,
size_gpu
,
"size of cpu array is not equal to size of gpu array"
);
cudaMemcpy
(
pdfs_cpu
,
pdfs_gpu
,
sizeof
(
real_t
)
*
size_cpu
,
cudaMemcpyDeviceToHost
);
cudaMemcpy
(
pullIdxs_cpu
,
pullIdxs_gpu
,
sizeof
(
index_t
)
*
size_cpu
,
cudaMemcpyDeviceToHost
);
}
template
<
typename
ListOnCPU
,
typename
ListOnGPU
>
void
copyToGpu
(
StructuredBlockStorage
&
blocks
,
const
BlockDataID
cpuListId
,
const
BlockDataID
gpuListId
)
{
for
(
auto
&
block
:
blocks
)
{
ListOnCPU
*
lbmList
=
block
.
getData
<
ListOnCPU
>
(
cpuListId
);
ListOnGPU
*
gpuList
=
block
.
getData
<
ListOnGPU
>
(
gpuListId
);
copyToGpu
(
*
lbmList
,
*
gpuList
);
}
}
namespace
internal
{
template
<
typename
CpuList_T
,
typename
GpuList_T
>
GpuList_T
*
createGPUListFromCPUList
(
const
IBlock
*
const
block
,
const
StructuredBlockStorage
*
const
,
ConstBlockDataID
cpu_pdfListId
)
{
// static_assert(std::is_same<CpuList_T, GpuList_T>::value, "type error!!");//check that types are compatible
const
CpuList_T
*
list_CPU
=
block
->
getData
<
CpuList_T
>
(
cpu_pdfListId
);
GpuList_T
*
list_GPU
=
new
GpuList_T
();
copyToGpu
(
*
list_CPU
,
*
list_GPU
);
return
list_GPU
;
}
}
template
<
typename
List_T
,
typename
GpuList_T
>
BlockDataID
addGPUListToStorage
(
const
shared_ptr
<
StructuredBlockStorage
>
&
bs
,
ConstBlockDataID
cpu_pdfListId
,
const
std
::
string
&
identifier
)
{
auto
func
=
boost
::
bind
(
internal
::
createGPUListFromCPUList
<
List_T
,
GpuList_T
>
,
_1
,
_2
,
cpu_pdfListId
);
return
bs
->
addStructuredBlockData
<
GpuList_T
>
(
func
,
identifier
);
}
}
// namespace cuda
}
// namespace walberla
\ No newline at end of file
apps/benchmarks/CUDA_ComplexGeometryList/input.conf
0 → 100644
View file @
3bec9d9a
ComplexGeometry
{
omega
1
.
4
;
dx
0
.
3
;
blockSize
<
256
,
256
,
256
>;
timeSteps
10000
;
meshFile
coronary_colored_medium
.
obj
;
deltaPressure
0
.
0001
;
}