diff --git a/python/pystencils_walberla/codegen.py b/python/pystencils_walberla/codegen.py index 896a795cc2670179ca417d3ce05f88d05abb1cfe..44de5c3fb878cf1b34c7c311ce1cdc1da8daae8a 100644 --- a/python/pystencils_walberla/codegen.py +++ b/python/pystencils_walberla/codegen.py @@ -73,33 +73,22 @@ def generate_sweep(generation_context, class_name, assignments, env = Environment(loader=PackageLoader('pystencils_walberla'), undefined=StrictUndefined) add_pystencils_filters_to_jinja_env(env) - if inner_outer_split is False: - jinja_context = { - 'kernel': KernelInfo(ast, temporary_fields, field_swaps, varying_parameters), - 'namespace': namespace, - 'class_name': class_name, - 'target': create_kernel_params.get("target", "cpu"), - 'headers': get_headers(ast), - 'ghost_layers_to_include': ghost_layers_to_include - } - header = env.get_template("Sweep.tmpl.h").render(**jinja_context) - source = env.get_template("Sweep.tmpl.cpp").render(**jinja_context) - else: - main_kernel_info = KernelInfo(ast, temporary_fields, field_swaps, varying_parameters) - representative_field = {p.field_name for p in main_kernel_info.parameters if p.is_field_parameter} - representative_field = sorted(representative_field)[0] - - jinja_context = { - 'kernel': main_kernel_info, - 'namespace': namespace, - 'class_name': class_name, - 'target': create_kernel_params.get("target", "cpu"), - 'field': representative_field, - 'headers': get_headers(ast), - 'ghost_layers_to_include': 0 - } - header = env.get_template("SweepInnerOuter.tmpl.h").render(**jinja_context) - source = env.get_template("SweepInnerOuter.tmpl.cpp").render(**jinja_context) + main_kernel_info = KernelInfo(ast, temporary_fields, field_swaps, varying_parameters) + representative_field = {p.field_name for p in main_kernel_info.parameters if p.is_field_parameter} + representative_field = sorted(representative_field)[0] + + jinja_context = { + 'kernel': main_kernel_info, + 'namespace': namespace, + 'class_name': class_name, + 'target': create_kernel_params.get("target", "cpu"), + 'field': representative_field, + 'headers': get_headers(ast), + 'ghost_layers_to_include': ghost_layers_to_include, + 'inner_outer_split': inner_outer_split + } + header = env.get_template("Sweep.tmpl.h").render(**jinja_context) + source = env.get_template("Sweep.tmpl.cpp").render(**jinja_context) source_extension = "cpp" if create_kernel_params.get("target", "cpu") == "cpu" else "cu" generation_context.write_file("{}.h".format(class_name), header) diff --git a/python/pystencils_walberla/templates/Sweep.tmpl.cpp b/python/pystencils_walberla/templates/Sweep.tmpl.cpp index b26d9c6db71a12d941c329a3897b707067ae4892..9711be8cde6d7a94475c8172cb858c03631f60d4 100644 --- a/python/pystencils_walberla/templates/Sweep.tmpl.cpp +++ b/python/pystencils_walberla/templates/Sweep.tmpl.cpp @@ -83,6 +83,69 @@ void {{class_name}}::runOnCellInterval( const shared_ptr<StructuredBlockStorage> {{kernel|generate_swaps|indent(4)}} } +{%if inner_outer_split%} +void {{class_name}}::inner( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream{% endif %} ) +{ + {{kernel|generate_block_data_to_field_extraction|indent(4)}} + + CellInterval inner = {{field}}->xyzSize(); + inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2])); + + {{kernel|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True)|indent(4) }} + {{kernel|generate_call(stream='stream', cell_interval='inner')|indent(4)}} +} + + +void {{class_name}}::outer( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream {% endif %} ) +{ + {{kernel|generate_block_data_to_field_extraction|indent(4)}} + + if( layers_.size() == 0 ) + { + CellInterval ci; + + {{field}}->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false); + layers_.push_back(ci); + {{field}}->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false); + layers_.push_back(ci); + + {{field}}->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false); + ci.expand(Cell(0, 0, -outerWidth_[2])); + layers_.push_back(ci); + {{field}}->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false); + ci.expand(Cell(0, 0, -outerWidth_[2])); + layers_.push_back(ci); + + {{field}}->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false); + ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2])); + layers_.push_back(ci); + {{field}}->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false); + ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2])); + layers_.push_back(ci); + } + + {%if target is equalto 'gpu'%} + { + auto parallelSection_ = parallelStreams_.parallelSection( stream ); + for( auto & ci: layers_ ) + { + parallelSection_.run([&]( auto s ) { + {{kernel|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True)|indent(4) }} + {{kernel|generate_call(stream='s', cell_interval='ci')|indent(16)}} + }); + } + } + {% else %} + for( auto & ci: layers_ ) + { + {{kernel|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True)|indent(8) }} + {{kernel|generate_call(cell_interval='ci')|indent(8)}} + } + {% endif %} + + {{kernel|generate_swaps|indent(4)}} +} +{% endif %} } // namespace {{namespace}} } // namespace walberla diff --git a/python/pystencils_walberla/templates/Sweep.tmpl.h b/python/pystencils_walberla/templates/Sweep.tmpl.h index b4db0f35bd6fc9ad5046c83bd99eca004291cc6f..9f0711a1e807553b2ca9cea2fa5dc5790c9c91af 100644 --- a/python/pystencils_walberla/templates/Sweep.tmpl.h +++ b/python/pystencils_walberla/templates/Sweep.tmpl.h @@ -24,6 +24,9 @@ #include "field/GhostLayerField.h" {%- elif target is equalto 'gpu' -%} #include "cuda/GPUField.h" +{% if inner_outer_split -%} +#include "cuda/ParallelStreams.h" +{%- endif %} {%- endif %} #include "field/SwapableCompare.h" #include "domain_decomposition/BlockDataID.h" @@ -42,6 +45,7 @@ #if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wunused-parameter" +# pragma GCC diagnostic ignored "-Wreorder" #endif namespace walberla { @@ -51,8 +55,8 @@ namespace {{namespace}} { class {{class_name}} { public: - {{class_name}}( {{kernel|generate_constructor_parameters}}) - : {{ kernel|generate_constructor_initializer_list }} + {{class_name}}( {{kernel|generate_constructor_parameters}} {%if inner_outer_split%}, const Cell & outerWidth=Cell(1, 1, 1){% endif %}) + : {{ kernel|generate_constructor_initializer_list }}{%if inner_outer_split%}{% if kernel|generate_constructor_initializer_list|length %},{% endif %} outerWidth_(outerWidth){% endif %} {}; {{ kernel| generate_destructor(class_name) |indent(4) }} @@ -79,7 +83,29 @@ public: }; } +{% if inner_outer_split %} + + void inner( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream = nullptr{% endif %} ); + void outer( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream = nullptr{% endif %} ); + + void setOuterPriority(int priority ) { + {%if target is equalto 'gpu'%} + parallelStreams_.setStreamPriority(priority); + {%endif%} + } + {{kernel|generate_members|indent(4)}} + +private: + {%if target is equalto 'gpu'%} + cuda::ParallelStreams parallelStreams_; + {% endif %} + + Cell outerWidth_; + std::vector<CellInterval> layers_; + +{%- else -%} {{ kernel|generate_members|indent(4) }} +{% endif -%} }; diff --git a/python/pystencils_walberla/templates/SweepInnerOuter.tmpl.cpp b/python/pystencils_walberla/templates/SweepInnerOuter.tmpl.cpp deleted file mode 100644 index 9f032a9ed79d54bf9c742fae64cf8a612bce2371..0000000000000000000000000000000000000000 --- a/python/pystencils_walberla/templates/SweepInnerOuter.tmpl.cpp +++ /dev/null @@ -1,149 +0,0 @@ -//====================================================================================================================== -// -// This file is part of waLBerla. waLBerla is free software: you can -// redistribute it and/or modify it under the terms of the GNU General Public -// License as published by the Free Software Foundation, either version 3 of -// the License, or (at your option) any later version. -// -// waLBerla is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// for more details. -// -// You should have received a copy of the GNU General Public License along -// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. -// -//! \\file {{class_name}}.cpp -//! \\ingroup lbm -//! \\author lbmpy -//====================================================================================================================== - -#include <cmath> - -#include "core/DataTypes.h" -#include "core/Macros.h" -#include "{{class_name}}.h" -{% for header in headers %} -#include {{header}} -{% endfor %} - - -{% if target is equalto 'cpu' -%} -#define FUNC_PREFIX -{%- elif target is equalto 'gpu' -%} -#define FUNC_PREFIX __global__ -{%- endif %} - -#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wfloat-equal" -# pragma GCC diagnostic ignored "-Wshadow" -# pragma GCC diagnostic ignored "-Wconversion" -#endif - -using namespace std; - -namespace walberla { -namespace {{namespace}} { - -{{kernel|generate_definition(target)}} - -void {{class_name}}::operator() ( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream{% endif %} ) -{ - {{kernel|generate_block_data_to_field_extraction|indent(4)}} - {{kernel|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True)|indent(4) }} - {{kernel|generate_call(stream='stream')|indent(4)}} - {{kernel|generate_swaps|indent(4)}} -} - - -void {{class_name}}::runOnCellInterval( const shared_ptr<StructuredBlockStorage> & blocks, - const CellInterval & globalCellInterval, - cell_idx_t ghostLayers, - IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream{% endif %} ) -{ - CellInterval ci = globalCellInterval; - CellInterval blockBB = blocks->getBlockCellBB( *block); - blockBB.expand( ghostLayers ); - ci.intersect( blockBB ); - blocks->transformGlobalToBlockLocalCellInterval( ci, *block ); - if( ci.empty() ) - return; - - {{kernel|generate_block_data_to_field_extraction|indent(4)}} - {{kernel|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True)|indent(4) }} - {{kernel|generate_call(stream='stream', cell_interval='ci')|indent(4)}} - {{kernel|generate_swaps|indent(4)}} -} - - -void {{class_name}}::inner( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream{% endif %} ) -{ - {{kernel|generate_block_data_to_field_extraction|indent(4)}} - - CellInterval inner = {{field}}->xyzSize(); - inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2])); - - {{kernel|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True)|indent(4) }} - {{kernel|generate_call(stream='stream', cell_interval='inner')|indent(4)}} -} - - -void {{class_name}}::outer( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream {% endif %} ) -{ - {{kernel|generate_block_data_to_field_extraction|indent(4)}} - - if( layers_.size() == 0 ) - { - CellInterval ci; - - {{field}}->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false); - layers_.push_back(ci); - {{field}}->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false); - layers_.push_back(ci); - - {{field}}->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false); - ci.expand(Cell(0, 0, -outerWidth_[2])); - layers_.push_back(ci); - {{field}}->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false); - ci.expand(Cell(0, 0, -outerWidth_[2])); - layers_.push_back(ci); - - {{field}}->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false); - ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2])); - layers_.push_back(ci); - {{field}}->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false); - ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2])); - layers_.push_back(ci); - } - - {%if target is equalto 'gpu'%} - { - auto parallelSection_ = parallelStreams_.parallelSection( stream ); - for( auto & ci: layers_ ) - { - parallelSection_.run([&]( auto s ) { - {{kernel|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True)|indent(4) }} - {{kernel|generate_call(stream='s', cell_interval='ci')|indent(16)}} - }); - } - } - {% else %} - for( auto & ci: layers_ ) - { - {{kernel|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True)|indent(8) }} - {{kernel|generate_call(cell_interval='ci')|indent(8)}} - } - {% endif %} - - {{kernel|generate_swaps|indent(4)}} -} - - -} // namespace {{namespace}} -} // namespace walberla - - -#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) -# pragma GCC diagnostic pop -#endif diff --git a/python/pystencils_walberla/templates/SweepInnerOuter.tmpl.h b/python/pystencils_walberla/templates/SweepInnerOuter.tmpl.h deleted file mode 100644 index 6f6094d0e08dfe00e7873cfb204f636c81c0a04a..0000000000000000000000000000000000000000 --- a/python/pystencils_walberla/templates/SweepInnerOuter.tmpl.h +++ /dev/null @@ -1,113 +0,0 @@ -//====================================================================================================================== -// -// This file is part of waLBerla. waLBerla is free software: you can -// redistribute it and/or modify it under the terms of the GNU General Public -// License as published by the Free Software Foundation, either version 3 of -// the License, or (at your option) any later version. -// -// waLBerla is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// for more details. -// -// You should have received a copy of the GNU General Public License along -// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. -// -//! \\file {{class_name}}.h -//! \\author pystencils -//====================================================================================================================== - -#pragma once -#include "core/DataTypes.h" - -{% if target is equalto 'cpu' -%} -#include "field/GhostLayerField.h" -{%- elif target is equalto 'gpu' -%} -#include "cuda/GPUField.h" -#include "cuda/ParallelStreams.h" -{%- endif %} -#include "field/SwapableCompare.h" -#include "domain_decomposition/BlockDataID.h" -#include "domain_decomposition/IBlock.h" -#include "domain_decomposition/StructuredBlockStorage.h" -#include <set> - -#ifdef __GNUC__ -#define RESTRICT __restrict__ -#elif _MSC_VER -#define RESTRICT __restrict -#else -#define RESTRICT -#endif - -#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wunused-parameter" -# pragma GCC diagnostic ignored "-Wreorder" -#endif - -namespace walberla { -namespace {{namespace}} { - - -class {{class_name}} -{ -public: - {{class_name}}( {{kernel|generate_constructor_parameters}}, const Cell & outerWidth=Cell(1, 1, 1)) - : {{ kernel|generate_constructor_initializer_list }}{% if kernel|generate_constructor_initializer_list|length %},{% endif %} outerWidth_(outerWidth) - {}; - - {{ kernel| generate_destructor(class_name) |indent(4) }} - - - void operator() ( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream = nullptr{% endif %} ); - - void runOnCellInterval(const shared_ptr<StructuredBlockStorage> & blocks, - const CellInterval & globalCellInterval, cell_idx_t ghostLayers, IBlock * block - {%if target is equalto 'gpu'%} , cudaStream_t stream = nullptr{% endif %}); - - - - static std::function<void (IBlock*)> getSweep(const shared_ptr<{{class_name}}> & kernel) { - return [kernel](IBlock * b) { (*kernel)(b); }; - } - - static std::function<void (IBlock*{%if target is equalto 'gpu'%} , cudaStream_t {% endif %})> - getSweepOnCellInterval(const shared_ptr<{{class_name}}> & kernel, - const shared_ptr<StructuredBlockStorage> & blocks, - const CellInterval & globalCellInterval, - cell_idx_t ghostLayers=1 ) - { - return [kernel, blocks, globalCellInterval, ghostLayers] (IBlock * b{%if target is equalto 'gpu'%} , cudaStream_t stream = nullptr{% endif %}) { - kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b{%if target is equalto 'gpu'%}, stream {% endif %}); - }; - } - - - void inner( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream = nullptr{% endif %} ); - void outer( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream = nullptr{% endif %} ); - - void setOuterPriority(int priority ) { - {%if target is equalto 'gpu'%} - parallelStreams_.setStreamPriority(priority); - {%endif%} - } - {{kernel|generate_members|indent(4)}} - -private: - {%if target is equalto 'gpu'%} - cuda::ParallelStreams parallelStreams_; - {% endif %} - - Cell outerWidth_; - std::vector<CellInterval> layers_; -}; - - -} // namespace {{namespace}} -} // namespace walberla - - -#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) -# pragma GCC diagnostic pop -#endif