Commit 9d8a7264 authored by Martin Bauer's avatar Martin Bauer
Browse files

Pass field information (shape,stride) as single elements instead of arr

- small (length < 5) arrays with shape and stride information had to be
  memcpy'd to the GPU before every kernel call
- instead of passing the information as arrays, the single elements are
  passed
- leads to more function arguments, but simplifies GPU kernel calls

-> changes in all backends required
parent 424621cf
......@@ -57,7 +57,7 @@ def create_boundary_class(boundary_object, lb_method, double_precision=True, tar
'class_name': boundary_object.name,
'StructName': struct_name,
'StructDeclaration': struct_from_numpy_dtype(struct_name, index_struct_dtype),
'kernel': KernelInfo(kernel, [], [], []),
'kernel': KernelInfo(kernel),
'stencil_info': stencil_info,
'dim': lb_method.dim,
'target': target,
......
......@@ -53,7 +53,7 @@ namespace {{namespace}} {
#endif
void {{class_name}}::run( IBlock * block, IndexVectors::Type type, cudaStream_t stream )
void {{class_name}}::run( IBlock * block, IndexVectors::Type type {% if target == 'gpu'%}, cudaStream_t stream {%endif%})
{
auto * indexVectors = block->getData<IndexVectors>(indexVectorID);
......@@ -67,25 +67,25 @@ void {{class_name}}::run( IBlock * block, IndexVectors::Type type, cudaStream_t
if( indexVectorSize == 0)
return;
uint8_t * fd_indexVector = reinterpret_cast<uint8_t*>(pointer);
uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer);
{{kernel|generate_block_data_to_field_extraction(['indexVector', 'indexVectorSize'])|indent(4)}}
{{kernel|generate_call(spatial_shape_symbols=['indexVectorSize'], stream='stream')|indent(4)}}
}
void {{class_name}}::operator() ( IBlock * block, cudaStream_t stream )
void {{class_name}}::operator() ( IBlock * block{% if target == 'gpu'%}, cudaStream_t stream {%endif%} )
{
run( block, IndexVectors::ALL, stream );
run( block, IndexVectors::ALL{% if target == 'gpu'%}, stream {%endif%});
}
void {{class_name}}::inner( IBlock * block, cudaStream_t stream )
void {{class_name}}::inner( IBlock * block{% if target == 'gpu'%}, cudaStream_t stream {%endif%} )
{
run( block, IndexVectors::INNER, stream );
run( block, IndexVectors::INNER{% if target == 'gpu'%}, stream {%endif%} );
}
void {{class_name}}::outer( IBlock * block, cudaStream_t stream )
void {{class_name}}::outer( IBlock * block{% if target == 'gpu'%}, cudaStream_t stream {%endif%} )
{
run( block, IndexVectors::OUTER, stream );
run( block, IndexVectors::OUTER{% if target == 'gpu'%}, stream {%endif%} );
}
......
......@@ -75,7 +75,10 @@ public:
CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; }
{{StructName}} * pointerCpu(Type t) { return &(cpuVectors_[t][0]); }
{% if target == 'gpu' -%}
{{StructName}} * pointerGpu(Type t) { return gpuVectors_[t]; }
{% endif %}
void syncGPU()
{
......@@ -89,7 +92,7 @@ public:
cudaMalloc( &gpuVec, sizeof({{StructName}}) * cpuVec.size() );
cudaMemcpy( gpuVec, &cpuVec[0], sizeof({{StructName}}) * cpuVec.size(), cudaMemcpyHostToDevice );
}
{% endif %}
{%- endif %}
}
private:
......@@ -110,9 +113,9 @@ public:
indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_{{class_name}}");
};
void operator() ( IBlock * block, cudaStream_t stream = 0 );
void inner( IBlock * block, cudaStream_t stream = 0 );
void outer( IBlock * block, cudaStream_t stream = 0 );
void operator() ( IBlock * block {% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%});
void inner( IBlock * block {% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%});
void outer( IBlock * block {% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%});
template<typename FlagField_T>
......@@ -169,11 +172,11 @@ public:
}
private:
void run( IBlock * block, IndexVectors::Type type, cudaStream_t stream );
void run( IBlock * block, IndexVectors::Type type{% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%});
BlockDataID indexVectorID;
{{kernel|generate_members(['indexVector', 'indexVectorSize'])|indent(4)}}
{{kernel|generate_members(('indexVector', 'indexVectorSize'))|indent(4)}}
};
......
......@@ -51,7 +51,7 @@
#endif
{% set lmIgnores = ['pdfs', 'pdfs_tmp'] %}
{% set lmIgnores = ('pdfs', 'pdfs_tmp') %}
// Forward declarations
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment