Commit 77aed6a0 authored by Martin Bauer's avatar Martin Bauer
Browse files

Added generated files to GPU benchmark

parent 997ec0b7
......@@ -2,5 +2,7 @@
waLBerla_link_files_to_builddir( "*.prm" )
waLBerla_add_executable ( NAME UniformGridBenchmarkGPU
FILES UniformGridGPU.cpp UniformGridGPU.gen.py
FILES UniformGridGPU.cpp UniformGridGPU_LatticeModel.cpp
UniformGridGPU_LbKernel.cu UniformGridGPU_NoSlip.cu UniformGridGPU_UBB.cu
UniformGridGPU_PackInfo.cu
DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk )
......@@ -2,7 +2,7 @@
Parameters
{
omega 1.8;
timesteps 10;
timesteps 1000;
remainingTimeLoggerFrequency 3;
vtkWriteFrequency 0;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \\file UniformGridGPU_LbKernel.h
//! \\author pystencils
//======================================================================================================================
#include "core/DataTypes.h"
#include "cuda/GPUField.h"
#include "cuda/ParallelStreams.h"
#include "field/SwapableCompare.h"
#include "domain_decomposition/BlockDataID.h"
#include "domain_decomposition/IBlock.h"
#include <set>
#ifdef __GNUC__
#define RESTRICT __restrict__
#elif _MSC_VER
#define RESTRICT __restrict
#else
#define RESTRICT
#endif
#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wunused-parameter"
#endif
namespace walberla {
namespace pystencils {
class UniformGridGPU_LbKernel
{
public:
UniformGridGPU_LbKernel( BlockDataID pdfsID_, double omega_)
: pdfsID(pdfsID_), omega(omega_)
{};
void operator() ( IBlock * block , cudaStream_t stream = 0 );
void inner( IBlock * block , cudaStream_t stream = 0 );
void outer( IBlock * block , cudaStream_t stream = 0 );
void setOuterPriority(int priority ) {
parallelStreams_.setStreamPriority(priority);
}
private:
BlockDataID pdfsID;
double omega;
std::set< cuda::GPUField<double> *, field::SwapableCompare< cuda::GPUField<double> * > > cache_pdfs_;
cuda::ParallelStreams parallelStreams_;
};
} // namespace pystencils
} // namespace walberla
#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG )
# pragma GCC diagnostic pop
#endif
\ No newline at end of file
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \\file UniformGridGPU_NoSlip.cpp
//! \\ingroup lbm
//! \\author lbmpy
//======================================================================================================================
#include <cmath>
#include "core/DataTypes.h"
#include "core/Macros.h"
#include "UniformGridGPU_NoSlip.h"
#include "cuda/ErrorChecking.h"
#define FUNC_PREFIX __global__
using namespace std;
namespace walberla {
namespace lbm {
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
#pragma GCC diagnostic ignored "-Wunused-variable"
#pragma GCC diagnostic ignored "-Wconversion"
#endif
namespace internal_boundary_UniformGridGPU_NoSlip {
static FUNC_PREFIX void boundary_UniformGridGPU_NoSlip(uint8_t * const _data_indexVector, double * _data_pdfs, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3, int64_t indexVectorSize)
{
if (blockDim.x*blockIdx.x + threadIdx.x < indexVectorSize)
{
uint8_t * const _data_indexVector_10 = _data_indexVector;
const int32_t x = *((int32_t *)(& _data_indexVector_10[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
uint8_t * const _data_indexVector_14 = _data_indexVector + 4;
const int32_t y = *((int32_t *)(& _data_indexVector_14[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
uint8_t * const _data_indexVector_18 = _data_indexVector + 8;
const int32_t z = *((int32_t *)(& _data_indexVector_18[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
const int64_t cx [] = { 0, 0, 0, -1, 1, 0, 0, -1, 1, -1, 1, 0, 0, -1, 1, 0, 0, -1, 1 };
const int64_t cy [] = { 0, 1, -1, 0, 0, 0, 0, 1, 1, -1, -1, 1, -1, 0, 0, 1, -1, 0, 0 };
const int64_t cz [] = { 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, 1, 1, 1, -1, -1, -1, -1 };
const int invdir [] = { 0, 2, 1, 4, 3, 6, 5, 10, 9, 8, 7, 16, 15, 18, 17, 12, 11, 14, 13 };
const double weights [] = { 0.333333333333333,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0555555555555556,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778,0.0277777777777778 };
uint8_t * const _data_indexVector_112 = _data_indexVector + 12;
const int32_t dir = *((int32_t *)(& _data_indexVector_112[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
double * _data_pdfs_m3B5BEDEA5094B12F = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*cy[dir] + _stride_pdfs_2*z + _stride_pdfs_2*cz[dir] + _stride_pdfs_3*invdir[dir];
double * _data_pdfs_10_20_m2227275638DDD757 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir;
_data_pdfs_m3B5BEDEA5094B12F[_stride_pdfs_0*x + _stride_pdfs_0*cx[dir]] = _data_pdfs_10_20_m2227275638DDD757[_stride_pdfs_0*x];
}
}
}
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
void UniformGridGPU_NoSlip::run( IBlock * block, IndexVectors::Type type , cudaStream_t stream )
{
auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
auto pointer = indexVectors->pointerGpu(type);
int64_t indexVectorSize = int64_c( indexVectors->indexVector(type).size() );
if( indexVectorSize == 0)
return;
uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
auto pdfs = block->getData< cuda::GPUField<double> >(pdfsID);
WALBERLA_ASSERT_GREATER_EQUAL(0, -int_c(pdfs->nrOfGhostLayers()));
double * _data_pdfs = pdfs->dataAt(0, 0, 0, 0);
const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
dim3 _block(int(((256 < indexVectorSize) ? 256 : indexVectorSize)), int(1), int(1));
dim3 _grid(int(( (indexVectorSize) % int(((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ( (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) ) +1 )), int(1), int(1));
internal_boundary_UniformGridGPU_NoSlip::boundary_UniformGridGPU_NoSlip<<<_grid, _block, 0, stream>>>(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
}
void UniformGridGPU_NoSlip::operator() ( IBlock * block, cudaStream_t stream )
{
run( block, IndexVectors::ALL, stream );
}
void UniformGridGPU_NoSlip::inner( IBlock * block, cudaStream_t stream )
{
run( block, IndexVectors::INNER, stream );
}
void UniformGridGPU_NoSlip::outer( IBlock * block, cudaStream_t stream )
{
run( block, IndexVectors::OUTER, stream );
}
} // namespace lbm
} // namespace walberla
\ No newline at end of file
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \\file UniformGridGPU_NoSlip.h
//! \\author pystencils
//======================================================================================================================
#include "core/DataTypes.h"
#include "cuda/GPUField.h"
#include "domain_decomposition/BlockDataID.h"
#include "domain_decomposition/IBlock.h"
#include "blockforest/StructuredBlockForest.h"
#include "field/FlagField.h"
#include <set>
#include <vector>
#ifdef __GNUC__
#define RESTRICT __restrict__
#elif _MSC_VER
#define RESTRICT __restrict
#else
#define RESTRICT
#endif
namespace walberla {
namespace lbm {
class UniformGridGPU_NoSlip
{
public:
struct IndexInfo {
int32_t x;
int32_t y;
int32_t z;
int32_t dir;
IndexInfo(int32_t x_, int32_t y_, int32_t z_, int32_t dir_) : x(x_), y(y_), z(z_), dir(dir_) {}
bool operator==(const IndexInfo & o) const {
return x == o.x && y == o.y && z == o.z && dir == o.dir;
}
};
class IndexVectors
{
public:
using CpuIndexVector = std::vector<IndexInfo>;
enum Type {
ALL = 0,
INNER = 1,
OUTER = 2,
NUM_TYPES = 3
};
IndexVectors() : cpuVectors_(NUM_TYPES) {}
bool operator==(IndexVectors & other) { return other.cpuVectors_ == cpuVectors_; }
~IndexVectors() {
for( auto & gpuVec: gpuVectors_)
cudaFree( gpuVec );
}
CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; }
IndexInfo * pointerCpu(Type t) { return &(cpuVectors_[t][0]); }
IndexInfo * pointerGpu(Type t) { return gpuVectors_[t]; }
void syncGPU()
{
gpuVectors_.resize( cpuVectors_.size() );
for(int i=0; i < NUM_TYPES; ++i )
{
auto & gpuVec = gpuVectors_[i];
auto & cpuVec = cpuVectors_[i];
cudaFree( gpuVec );
cudaMalloc( &gpuVec, sizeof(IndexInfo) * cpuVec.size() );
cudaMemcpy( gpuVec, &cpuVec[0], sizeof(IndexInfo) * cpuVec.size(), cudaMemcpyHostToDevice );
}
}
private:
std::vector<CpuIndexVector> cpuVectors_;
using GpuIndexVector = IndexInfo *;
std::vector<GpuIndexVector> gpuVectors_;
};
UniformGridGPU_NoSlip( const shared_ptr<StructuredBlockForest> & blocks,
BlockDataID pdfsID_ )
: pdfsID(pdfsID_)
{
auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); };
indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_UniformGridGPU_NoSlip");
};
void operator() ( IBlock * block , cudaStream_t stream = 0 );
void inner( IBlock * block , cudaStream_t stream = 0 );
void outer( IBlock * block , cudaStream_t stream = 0 );
template<typename FlagField_T>
void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
FlagUID boundaryFlagUID, FlagUID domainFlagUID)
{
for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
fillFromFlagField<FlagField_T>( &*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID );
}
template<typename FlagField_T>
void fillFromFlagField( IBlock * block, ConstBlockDataID flagFieldID,
FlagUID boundaryFlagUID, FlagUID domainFlagUID )
{
auto * indexVectors = block->getData< IndexVectors > ( indexVectorID );
auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
auto * flagField = block->getData< FlagField_T > ( flagFieldID );
auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
auto domainFlag = flagField->getFlag(domainFlagUID);
auto inner = flagField->xyzSize();
inner.expand( cell_idx_t(-1) );
indexVectorAll.clear();
indexVectorInner.clear();
indexVectorOuter.clear();
for( auto it = flagField->begin(); it != flagField->end(); ++it )
{
if( ! isFlagSet(it, domainFlag) )
continue;
if ( isFlagSet( it.neighbor(0, 0, 0 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 0 );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
else
indexVectorOuter.push_back( element );
}
if ( isFlagSet( it.neighbor(0, 1, 0 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 1 );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
else
indexVectorOuter.push_back( element );
}
if ( isFlagSet( it.neighbor(0, -1, 0 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 2 );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
else
indexVectorOuter.push_back( element );
}
if ( isFlagSet( it.neighbor(-1, 0, 0 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 3 );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
else
indexVectorOuter.push_back( element );
}
if ( isFlagSet( it.neighbor(1, 0, 0 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 4 );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
else
indexVectorOuter.push_back( element );
}
if ( isFlagSet( it.neighbor(0, 0, 1 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 5 );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
else
indexVectorOuter.push_back( element );
}
if ( isFlagSet( it.neighbor(0, 0, -1 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 6 );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
else
indexVectorOuter.push_back( element );
}
if ( isFlagSet( it.neighbor(-1, 1, 0 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 7 );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
else
indexVectorOuter.push_back( element );
}
if ( isFlagSet( it.neighbor(1, 1, 0 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 8 );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
else
indexVectorOuter.push_back( element );
}
if ( isFlagSet( it.neighbor(-1, -1, 0 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 9 );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
else
indexVectorOuter.push_back( element );
}
if ( isFlagSet( it.neighbor(1, -1, 0 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 10 );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
else
indexVectorOuter.push_back( element );
}
if ( isFlagSet( it.neighbor(0, 1, 1 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 11 );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
else
indexVectorOuter.push_back( element );
}
if ( isFlagSet( it.neighbor(0, -1, 1 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 12 );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
else
indexVectorOuter.push_back( element );
}
if ( isFlagSet( it.neighbor(-1, 0, 1 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 13 );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
else
indexVectorOuter.push_back( element );
}
if ( isFlagSet( it.neighbor(1, 0, 1 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 14 );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
else
indexVectorOuter.push_back( element );
}
if ( isFlagSet( it.neighbor(0, 1, -1 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 15 );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
else
indexVectorOuter.push_back( element );
}
if ( isFlagSet( it.neighbor(0, -1, -1 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 16 );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
else
indexVectorOuter.push_back( element );
}
if ( isFlagSet( it.neighbor(-1, 0, -1 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 17 );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
else
indexVectorOuter.push_back( element );
}
if ( isFlagSet( it.neighbor(1, 0, -1 , 0 ), boundaryFlag ) )
{
auto element = IndexInfo(it.x(), it.y(), it.z(), 18 );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
else
indexVectorOuter.push_back( element );