Commit 424621cf authored by Martin Bauer's avatar Martin Bauer
Browse files

Worked on waLBerla boundary sweeps generation

parent 7f33dd8f
......@@ -49,6 +49,7 @@ def create_boundary_class(boundary_object, lb_method, double_precision=True, tar
shape=(TypedSymbol("indexVectorSize", create_type(np.int64)), 1), strides=(1, 1))
kernel = create_lattice_boltzmann_boundary_kernel(pdf_field, index_field, lb_method, boundary_object, target=target)
kernel.function_name = "boundary_" + boundary_object.name
stencil_info = [(i, ", ".join([str(e) for e in d])) for i, d in enumerate(lb_method.stencil)]
......
......@@ -53,15 +53,39 @@ namespace {{namespace}} {
#endif
void {{class_name}}::operator() ( IBlock * block )
void {{class_name}}::run( IBlock * block, IndexVectors::Type type, cudaStream_t stream )
{
auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
auto & indexVector = indexVectors->{{target}};
int64_t indexVectorSize = int64_c( indexVectors->cpu.size() );
uint8_t * fd_indexVector = reinterpret_cast<uint8_t*>(&indexVector[0]);
{% if target == 'gpu' -%}
auto pointer = indexVectors->pointerGpu(type);
{% else %}
auto pointer = indexVectors->pointerCpu(type);
{% endif %}
int64_t indexVectorSize = int64_c( indexVectors->indexVector(type).size() );
if( indexVectorSize == 0)
return;
uint8_t * fd_indexVector = reinterpret_cast<uint8_t*>(pointer);
{{kernel|generate_block_data_to_field_extraction(['indexVector', 'indexVectorSize'])|indent(4)}}
{{kernel|generate_call(spatial_shape_symbols=['indexVectorSize'])|indent(4)}}
{{kernel|generate_call(spatial_shape_symbols=['indexVectorSize'], stream='stream')|indent(4)}}
}
void {{class_name}}::operator() ( IBlock * block, cudaStream_t stream )
{
run( block, IndexVectors::ALL, stream );
}
void {{class_name}}::inner( IBlock * block, cudaStream_t stream )
{
run( block, IndexVectors::INNER, stream );
}
void {{class_name}}::outer( IBlock * block, cudaStream_t stream )
{
run( block, IndexVectors::OUTER, stream );
}
......
......@@ -50,26 +50,58 @@ class {{class_name}}
public:
{{StructDeclaration|indent(4)}}
struct IndexVectors
class IndexVectors
{
bool operator==(IndexVectors & other) { return cpu == other.cpu; }
public:
using CpuIndexVector = std::vector<{{StructName}}>;
enum Type {
ALL = 0,
INNER = 1,
OUTER = 2,
NUM_TYPES = 3
};
IndexVectors() : cpuVectors_(NUM_TYPES) {}
bool operator==(IndexVectors & other) { return other.cpuVectors_ == cpuVectors_; }
std::vector<{{StructName}}> cpu;
{% if target == 'gpu' -%}
{{StructName}} * gpu;
~IndexVectors() {
for( auto & gpuVec: gpuVectors_)
cudaFree( gpuVec );
}
{% endif %}
~IndexVectors() { cudaFree(gpu); }
IndexVectors() : gpu(nullptr) {}
CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; }
{{StructName}} * pointerCpu(Type t) { return &(cpuVectors_[t][0]); }
{{StructName}} * pointerGpu(Type t) { return gpuVectors_[t]; }
void syncGPU()
{
cudaFree( gpu );
cudaMalloc( &gpu, sizeof({{StructName}}) * cpu.size() );
cudaMemcpy( gpu, &cpu[0], sizeof({{StructName}}) * cpu.size(), cudaMemcpyHostToDevice );
{% if target == 'gpu' -%}
gpuVectors_.resize( cpuVectors_.size() );
for(int i=0; i < NUM_TYPES; ++i )
{
auto & gpuVec = gpuVectors_[i];
auto & cpuVec = cpuVectors_[i];
cudaFree( gpuVec );
cudaMalloc( &gpuVec, sizeof({{StructName}}) * cpuVec.size() );
cudaMemcpy( gpuVec, &cpuVec[0], sizeof({{StructName}}) * cpuVec.size(), cudaMemcpyHostToDevice );
}
{% endif %}
}
private:
std::vector<CpuIndexVector> cpuVectors_;
{% if target == 'gpu' -%}
using GpuIndexVector = {{StructName}} *;
std::vector<GpuIndexVector> gpuVectors_;
{% endif %}
};
{{class_name}}( const shared_ptr<StructuredBlockForest> & blocks,
{{kernel|generate_constructor_parameters(['indexVector', 'indexVectorSize'])}} )
: {{ kernel|generate_constructor_initializer_list(['indexVector', 'indexVectorSize']) }}
......@@ -78,7 +110,10 @@ public:
indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_{{class_name}}");
};
void operator() ( IBlock * block );
void operator() ( IBlock * block, cudaStream_t stream = 0 );
void inner( IBlock * block, cudaStream_t stream = 0 );
void outer( IBlock * block, cudaStream_t stream = 0 );
template<typename FlagField_T>
void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
......@@ -94,16 +129,23 @@ public:
FlagUID boundaryFlagUID, FlagUID domainFlagUID )
{
auto * indexVectors = block->getData< IndexVectors > ( indexVectorID );
auto & indexVector = indexVectors->cpu;
auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
auto * flagField = block->getData< FlagField_T > ( flagFieldID );
auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
auto domainFlag = flagField->getFlag(domainFlagUID);
indexVector.clear();
{% if target == 'gpu' %}
cudaFree( indexVectors->gpu );
{% endif %}
auto inner = flagField->xyzSize();
inner.expand( cell_idx_t(-1) );
indexVectorAll.clear();
indexVectorInner.clear();
indexVectorOuter.clear();
for( auto it = flagField->begin(); it != flagField->end(); ++it )
{
......@@ -111,22 +153,24 @@ public:
continue;
{%- for dirIdx, offset in stencil_info %}
{% if dim == 2 -%}
if ( isFlagSet( it.neighbor({{offset}}, 0), boundaryFlag ) )
indexVector.push_back({{StructName}}(it.x(), it.y(), {{dirIdx}} ) );
{%- elif dim == 3 -%}
if ( isFlagSet( it.neighbor({{offset}}), boundaryFlag ) )
indexVector.push_back({{StructName}}(it.x(), it.y(), it.z(), {{dirIdx}} ) );
{%- endif -%}
if ( isFlagSet( it.neighbor({{offset}} {%if dim == 3%}, 0 {%endif %}), boundaryFlag ) )
{
auto element = {{StructName}}(it.x(), it.y(), {%if dim == 3%} it.z(), {%endif %} {{dirIdx}} );
indexVectorAll.push_back( element );
if( inner.contains( it.x(), it.y(), it.z() ) )
indexVectorInner.push_back( element );
else
indexVectorOuter.push_back( element );
}
{% endfor %}
}
{% if target == 'gpu' %}
indexVectors->syncGPU();
{% endif %}
}
private:
void run( IBlock * block, IndexVectors::Type type, cudaStream_t stream );
BlockDataID indexVectorID;
{{kernel|generate_members(['indexVector', 'indexVectorSize'])|indent(4)}}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment