diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6b327d488b816a733960ce7f47125fdbfa181075..360b2d122fb4d4d688c18e94f33f05d6f49f4982 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -376,8 +376,7 @@ intel_19_hybrid: <<: *build_definition image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:19 before_script: - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy.git - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy_walberla.git + - pip3 install lbmpy variables: <<: *build_hybrid_variables WALBERLA_BUILD_WITH_CUDA: "OFF" @@ -394,8 +393,7 @@ intel_19_serial_dbg: <<: *build_definition image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:19 before_script: - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy.git - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy_walberla.git + - pip3 install lbmpy variables: <<: *build_serial_dbg_variables WALBERLA_BUILD_WITH_CUDA: "OFF" @@ -412,8 +410,7 @@ intel_19_mpionly_dbg: <<: *build_definition image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:19 before_script: - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy.git - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy_walberla.git + - pip3 install lbmpy variables: <<: *build_mpionly_dbg_variables WALBERLA_BUILD_WITH_CUDA: "OFF" @@ -430,8 +427,7 @@ intel_19_hybrid_dbg: <<: *build_definition image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:19 before_script: - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy.git - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy_walberla.git + - pip3 install lbmpy variables: <<: *build_hybrid_dbg_variables WALBERLA_BUILD_WITH_CUDA: "OFF" @@ -447,13 +443,9 @@ intel_19_hybrid_dbg: intel_19_hybrid_dbg_sp: <<: *build_definition image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:19 - before_script: - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy.git - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy_walberla.git variables: <<: *build_hybrid_dbg_sp_variables WALBERLA_BUILD_WITH_CUDA: "OFF" - WALBERLA_BUILD_WITH_CODEGEN: "ON" WALBERLA_ENABLE_GUI: 0 except: variables: @@ -871,8 +863,7 @@ gcc_9_hybrid: image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9 stage: pretest before_script: - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy.git - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy_walberla.git + - pip3 install lbmpy variables: <<: *build_hybrid_variables WALBERLA_BUILD_WITH_CUDA: "OFF" @@ -888,8 +879,7 @@ gcc_9_serial_dbg: <<: *build_definition image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9 before_script: - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy.git - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy_walberla.git + - pip3 install lbmpy variables: <<: *build_serial_dbg_variables WALBERLA_BUILD_WITH_CUDA: "OFF" @@ -905,8 +895,7 @@ gcc_9_mpionly_dbg: <<: *build_definition image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9 before_script: - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy.git - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy_walberla.git + - pip3 install lbmpy variables: <<: *build_mpionly_dbg_variables WALBERLA_BUILD_WITH_CUDA: "OFF" @@ -922,8 +911,7 @@ gcc_9_hybrid_dbg: <<: *build_definition image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9 before_script: - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy.git - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy_walberla.git + - pip3 install lbmpy variables: <<: *build_hybrid_dbg_variables WALBERLA_BUILD_WITH_CUDA: "OFF" @@ -938,13 +926,9 @@ gcc_9_hybrid_dbg: gcc_9_hybrid_dbg_sp: <<: *build_definition image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:9 - before_script: - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy.git - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy_walberla.git variables: <<: *build_hybrid_dbg_sp_variables WALBERLA_BUILD_WITH_CUDA: "OFF" - WALBERLA_BUILD_WITH_CODEGEN: "ON" WALBERLA_ENABLE_GUI: 0 except: variables: @@ -1444,8 +1428,7 @@ clang_9.0_hybrid: <<: *build_definition image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:9.0 before_script: - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy.git - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy_walberla.git + - pip3 install lbmpy variables: <<: *build_hybrid_variables WALBERLA_BUILD_WITH_CUDA: "OFF" @@ -1461,8 +1444,7 @@ clang_9.0_serial_dbg: <<: *build_definition image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:9.0 before_script: - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy.git - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy_walberla.git + - pip3 install lbmpy variables: <<: *build_serial_dbg_variables WALBERLA_BUILD_WITH_CUDA: "OFF" @@ -1478,8 +1460,7 @@ clang_9.0_mpionly_dbg: <<: *build_definition image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:9.0 before_script: - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy.git - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy_walberla.git + - pip3 install lbmpy variables: <<: *build_mpionly_dbg_variables WALBERLA_BUILD_WITH_CUDA: "OFF" @@ -1495,8 +1476,7 @@ clang_9.0_hybrid_dbg: <<: *build_definition image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:9.0 before_script: - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy.git - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy_walberla.git + - pip3 install lbmpy variables: <<: *build_hybrid_dbg_variables WALBERLA_BUILD_WITH_CUDA: "OFF" @@ -1512,13 +1492,9 @@ clang_9.0_hybrid_dbg_sp: <<: *build_definition image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:9.0 stage: pretest - before_script: - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy.git - - pip3 install git+https://i10git.cs.fau.de/pycodegen/lbmpy_walberla.git variables: <<: *build_hybrid_dbg_sp_variables WALBERLA_BUILD_WITH_CUDA: "OFF" - WALBERLA_BUILD_WITH_CODEGEN: "ON" WALBERLA_ENABLE_GUI: 0 except: variables: diff --git a/CMakeLists.txt b/CMakeLists.txt index 0e0d245154ced30d96aa407b0c67da474f7604e8..639b3c0ef98949cf36153c2e37ae5ab627f371fd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -560,7 +560,7 @@ endif ( ) ############################################################################################################################# if ( WALBERLA_BUILD_WITH_CODEGEN ) find_package( PythonInterp 3 QUIET REQUIRED) - execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import pystencils_walberla; from pystencils.include import get_pystencils_include_path; print(get_pystencils_include_path())" + execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "from pystencils.include import get_pystencils_include_path; print(get_pystencils_include_path())" RESULT_VARIABLE PYTHON_RET_CODE OUTPUT_VARIABLE PYSTENCILS_INCLUDE_PATH) if(NOT PYTHON_RET_CODE EQUAL 0) diff --git a/cmake/waLBerlaHelperFunctions.cmake b/cmake/waLBerlaHelperFunctions.cmake index 7a49ab385cc9ff12ef819b44a8db25deb51d76eb..21c9b01b517a15eafd1f6fbf6f855815e55563a4 100644 --- a/cmake/waLBerlaHelperFunctions.cmake +++ b/cmake/waLBerlaHelperFunctions.cmake @@ -64,10 +64,11 @@ function( handle_python_codegen sourceFilesOut generatedSourceFilesOut generator string(REPLACE "\"" "\\\"" pythonParameters ${pythonParameters}) # even one more quoting level required string(REPLACE "\n" "" pythonParameters ${pythonParameters}) # remove newline characters + set( WALBERLA_PYTHON_DIR ${walberla_SOURCE_DIR}/python) file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${codegenCfg}") add_custom_command(OUTPUT ${generatedWithAbsolutePath} DEPENDS ${sourceFile} - COMMAND ${PYTHON_EXECUTABLE} ${sourceFile} ${pythonParameters} + COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${WALBERLA_PYTHON_DIR} ${PYTHON_EXECUTABLE} ${sourceFile} ${pythonParameters} WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${codegenCfg}") endif() else() diff --git a/python/lbmpy_walberla/__init__.py b/python/lbmpy_walberla/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f7b022ab058e6a83a05aafa5fa586fb59d8551fd --- /dev/null +++ b/python/lbmpy_walberla/__init__.py @@ -0,0 +1,4 @@ +from .boundary import generate_boundary +from .walberla_lbm_generation import RefinementScaling, generate_lattice_model + +__all__ = ['generate_lattice_model', 'RefinementScaling', 'generate_boundary'] diff --git a/python/lbmpy_walberla/boundary.py b/python/lbmpy_walberla/boundary.py new file mode 100644 index 0000000000000000000000000000000000000000..32176989f9f36adbfe4a155e242a9a524200479d --- /dev/null +++ b/python/lbmpy_walberla/boundary.py @@ -0,0 +1,92 @@ +import numpy as np +from jinja2 import Environment, PackageLoader, StrictUndefined + +from lbmpy.boundaries.boundaryhandling import create_lattice_boltzmann_boundary_kernel +from lbmpy_walberla.walberla_lbm_generation import KernelInfo +from pystencils import Field, FieldType +from pystencils.boundaries.createindexlist import ( + boundary_index_array_coordinate_names, direction_member_name, + numpy_data_type_for_boundary_object) +from pystencils.data_types import TypedSymbol, create_type +from pystencils_walberla.codegen import default_create_kernel_parameters +from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env + + +def generate_boundary(generation_context, class_name, boundary_object, lb_method, **create_kernel_params): + struct_name = "IndexInfo" + boundary_object.name = class_name + + create_kernel_params = default_create_kernel_parameters(generation_context, create_kernel_params) + target = create_kernel_params['target'] + + index_struct_dtype = numpy_data_type_for_boundary_object(boundary_object, lb_method.dim) + + pdf_field = Field.create_generic('pdfs', lb_method.dim, + np.float64 if generation_context.double_accuracy else np.float32, + index_dimensions=1, layout='fzyx', index_shape=[len(lb_method.stencil)]) + + index_field = Field('indexVector', FieldType.INDEXED, index_struct_dtype, layout=[0], + shape=(TypedSymbol("indexVectorSize", create_type(np.int64)), 1), strides=(1, 1)) + + kernel = create_lattice_boltzmann_boundary_kernel(pdf_field, index_field, lb_method, boundary_object, target=target, + openmp=generation_context.openmp) + kernel.function_name = "boundary_" + boundary_object.name + + # waLBerla is a 3D framework. Therefore, a zero for the z index has to be added if we work in 2D + if lb_method.dim == 2: + stencil = () + for d in lb_method.stencil: + d = d + (0,) + stencil = stencil + (d,) + else: + stencil = lb_method.stencil + + stencil_info = [(i, ", ".join([str(e) for e in d])) for i, d in enumerate(stencil)] + + context = { + 'class_name': boundary_object.name, + 'StructName': struct_name, + 'StructDeclaration': struct_from_numpy_dtype(struct_name, index_struct_dtype), + 'kernel': KernelInfo(kernel), + 'stencil_info': stencil_info, + 'dim': lb_method.dim, + 'target': target, + 'namespace': 'lbm', + } + + env = Environment(loader=PackageLoader('lbmpy_walberla'), undefined=StrictUndefined) + add_pystencils_filters_to_jinja_env(env) + + header = env.get_template('Boundary.tmpl.h').render(**context) + source = env.get_template('Boundary.tmpl.cpp').render(**context) + + source_extension = "cpp" if create_kernel_params.get("target", "cpu") == "cpu" else "cu" + generation_context.write_file("{}.h".format(class_name), header) + generation_context.write_file("{}.{}".format(class_name, source_extension), source) + + +def struct_from_numpy_dtype(struct_name, numpy_dtype): + result = "struct %s { \n" % (struct_name,) + + equality_compare = [] + constructor_params = [] + constructor_initializer_list = [] + for name, (sub_type, offset) in numpy_dtype.fields.items(): + pystencils_type = create_type(sub_type) + result += " %s %s;\n" % (pystencils_type, name) + if name in boundary_index_array_coordinate_names or name == direction_member_name: + constructor_params.append("%s %s_" % (pystencils_type, name)) + constructor_initializer_list.append("%s(%s_)" % (name, name)) + else: + constructor_initializer_list.append("%s()" % name) + if pystencils_type.is_float(): + equality_compare.append("floatIsEqual(%s, o.%s)" % (name, name)) + else: + equality_compare.append("%s == o.%s" % (name, name)) + + result += " %s(%s) : %s {}\n" % \ + (struct_name, ", ".join(constructor_params), ", ".join(constructor_initializer_list)) + result += " bool operator==(const %s & o) const {\n return %s;\n }\n" % \ + (struct_name, " && ".join(equality_compare)) + result += "};\n" + return result diff --git a/python/lbmpy_walberla/sparse.py b/python/lbmpy_walberla/sparse.py new file mode 100644 index 0000000000000000000000000000000000000000..95e45f64efe9fbbcfae235f6c4ce3d65dc300269 --- /dev/null +++ b/python/lbmpy_walberla/sparse.py @@ -0,0 +1,49 @@ +import numpy as np + +from lbmpy.sparse import ( + create_lb_update_rule_sparse, create_macroscopic_value_getter_sparse, + create_macroscopic_value_setter_sparse, create_symbolic_list) +from pystencils import TypedSymbol, create_kernel + + +class ListLbGenerator: + def __init__(self, collision_rule, pdf_type=np.float64, index_type=np.uint32, layout='SoA'): + self.collision_rule = collision_rule + + num_cells = TypedSymbol('num_cells', np.uint64) + method = self.collision_rule.method + q = len(method.stencil) + + self.num_cells = num_cells + self.src = create_symbolic_list('src', num_cells, q, pdf_type, layout) + self.dst = create_symbolic_list('dst', num_cells, q, pdf_type, layout) + self.idx = create_symbolic_list('idx', num_cells, q, index_type, layout) + + def kernel(self, kernel_type='stream_pull_collide'): + ac = create_lb_update_rule_sparse(self.collision_rule, self.src, self.dst, self.idx, kernel_type) + return create_kernel(ac) + + def getter_ast(self, density=True, velocity=True): + assert density or velocity + + method = self.collision_rule.method + output_descriptor = {} + if density: + output_descriptor['density'] = create_symbolic_list('rho', self.src.spatial_shape[0], 1, self.src.dtype) + if velocity: + output_descriptor['velocity'] = create_symbolic_list('vel', self.src.spatial_shape[0], + method.dim, self.src.dtype) + + ac = create_macroscopic_value_getter_sparse(method, self.src, output_descriptor) + return create_kernel(ac) + + def setter_ast(self, density=True, velocity=True): + method = self.collision_rule.method + if density is True: + density = create_symbolic_list('rho', self.src.spatial_shape[0], 1, self.src.dtype).center + + if velocity is True: + velocity = create_symbolic_list('vel', self.src.spatial_shape[0], method.dim, self.src.dtype).center_vector + + ac = create_macroscopic_value_setter_sparse(method, self.src, density, velocity) + return create_kernel(ac) diff --git a/python/lbmpy_walberla/templates/Boundary.tmpl.cpp b/python/lbmpy_walberla/templates/Boundary.tmpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d4b3333b14bbd2939964a90723ad60b1ed465f6e --- /dev/null +++ b/python/lbmpy_walberla/templates/Boundary.tmpl.cpp @@ -0,0 +1,104 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \\file {{class_name}}.cpp +//! \\ingroup lbm +//! \\author lbmpy +//====================================================================================================================== + +#include <cmath> + +#include "core/DataTypes.h" +#include "core/Macros.h" +#include "{{class_name}}.h" +{% if target == 'gpu' -%} +#include "cuda/ErrorChecking.h" +{%- endif %} + + +{% if target == 'cpu' -%} +#define FUNC_PREFIX +{%- elif target == 'gpu' -%} +#define FUNC_PREFIX __global__ +{%- endif %} + +using namespace std; + +namespace walberla { +namespace {{namespace}} { + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#pragma GCC diagnostic ignored "-Wunused-variable" +#pragma GCC diagnostic ignored "-Wconversion" +#endif + +#ifdef __CUDACC__ +#pragma push +#pragma diag_suppress = declared_but_not_referenced +#endif + + +{{kernel|generate_definition}} + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +#ifdef __CUDACC__ +#pragma pop +#endif + + +void {{class_name}}::run( IBlock * block, IndexVectors::Type type {% if target == 'gpu'%}, cudaStream_t stream {%endif%}) +{ + auto * indexVectors = block->getData<IndexVectors>(indexVectorID); + int64_t indexVectorSize = int64_c( indexVectors->indexVector(type).size() ); + if( indexVectorSize == 0) + return; + + {% if target == 'gpu' -%} + auto pointer = indexVectors->pointerGpu(type); + {% else %} + auto pointer = indexVectors->pointerCpu(type); + {% endif %} + + uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer); + + {{kernel|generate_block_data_to_field_extraction(['indexVector', 'indexVectorSize'])|indent(4)}} + {{kernel|generate_call(spatial_shape_symbols=['indexVectorSize'], stream='stream')|indent(4)}} +} + +void {{class_name}}::operator() ( IBlock * block{% if target == 'gpu'%}, cudaStream_t stream {%endif%} ) +{ + run( block, IndexVectors::ALL{% if target == 'gpu'%}, stream {%endif%}); +} + +void {{class_name}}::inner( IBlock * block{% if target == 'gpu'%}, cudaStream_t stream {%endif%} ) +{ + run( block, IndexVectors::INNER{% if target == 'gpu'%}, stream {%endif%} ); +} + +void {{class_name}}::outer( IBlock * block{% if target == 'gpu'%}, cudaStream_t stream {%endif%} ) +{ + run( block, IndexVectors::OUTER{% if target == 'gpu'%}, stream {%endif%} ); +} + + +} // namespace {{namespace}} +} // namespace walberla + + diff --git a/python/lbmpy_walberla/templates/Boundary.tmpl.h b/python/lbmpy_walberla/templates/Boundary.tmpl.h new file mode 100644 index 0000000000000000000000000000000000000000..97dabe87a52bd6b7eb8b3c8116fd0392988528f5 --- /dev/null +++ b/python/lbmpy_walberla/templates/Boundary.tmpl.h @@ -0,0 +1,188 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \\file {{class_name}}.h +//! \\author pystencils +//====================================================================================================================== + + +#include "core/DataTypes.h" + +{% if target is equalto 'cpu' -%} +#include "field/GhostLayerField.h" +{%- elif target is equalto 'gpu' -%} +#include "cuda/GPUField.h" +{%- endif %} +#include "domain_decomposition/BlockDataID.h" +#include "domain_decomposition/IBlock.h" +#include "blockforest/StructuredBlockForest.h" +#include "field/FlagField.h" + +#include <set> +#include <vector> + +#ifdef __GNUC__ +#define RESTRICT __restrict__ +#elif _MSC_VER +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +namespace walberla { +namespace {{namespace}} { + + +class {{class_name}} +{ +public: + {{StructDeclaration|indent(4)}} + + + class IndexVectors + { + public: + using CpuIndexVector = std::vector<{{StructName}}>; + + enum Type { + ALL = 0, + INNER = 1, + OUTER = 2, + NUM_TYPES = 3 + }; + + IndexVectors() : cpuVectors_(NUM_TYPES) {} + bool operator==(IndexVectors & other) { return other.cpuVectors_ == cpuVectors_; } + + {% if target == 'gpu' -%} + ~IndexVectors() { + for( auto & gpuVec: gpuVectors_) + cudaFree( gpuVec ); + } + {% endif %} + + CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; } + {{StructName}} * pointerCpu(Type t) { return &(cpuVectors_[t][0]); } + + {% if target == 'gpu' -%} + {{StructName}} * pointerGpu(Type t) { return gpuVectors_[t]; } + {% endif %} + + void syncGPU() + { + {% if target == 'gpu' -%} + gpuVectors_.resize( cpuVectors_.size() ); + for(size_t i=0; i < size_t(NUM_TYPES); ++i ) + { + auto & gpuVec = gpuVectors_[i]; + auto & cpuVec = cpuVectors_[i]; + cudaFree( gpuVec ); + cudaMalloc( &gpuVec, sizeof({{StructName}}) * cpuVec.size() ); + cudaMemcpy( gpuVec, &cpuVec[0], sizeof({{StructName}}) * cpuVec.size(), cudaMemcpyHostToDevice ); + } + {%- endif %} + } + + private: + std::vector<CpuIndexVector> cpuVectors_; + + {% if target == 'gpu' -%} + using GpuIndexVector = {{StructName}} *; + std::vector<GpuIndexVector> gpuVectors_; + {% endif %} + }; + + + {{class_name}}( const shared_ptr<StructuredBlockForest> & blocks, + {{kernel|generate_constructor_parameters(['indexVector', 'indexVectorSize'])}} ) + : {{ kernel|generate_constructor_initializer_list(['indexVector', 'indexVectorSize']) }} + { + auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); }; + indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_{{class_name}}"); + }; + + void operator() ( IBlock * block {% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%}); + void inner( IBlock * block {% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%}); + void outer( IBlock * block {% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%}); + + + template<typename FlagField_T> + void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID, + FlagUID boundaryFlagUID, FlagUID domainFlagUID) + { + for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt ) + fillFromFlagField<FlagField_T>( &*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID ); + } + + + template<typename FlagField_T> + void fillFromFlagField( IBlock * block, ConstBlockDataID flagFieldID, + FlagUID boundaryFlagUID, FlagUID domainFlagUID ) + { + auto * indexVectors = block->getData< IndexVectors > ( indexVectorID ); + auto & indexVectorAll = indexVectors->indexVector(IndexVectors::ALL); + auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER); + auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER); + + + auto * flagField = block->getData< FlagField_T > ( flagFieldID ); + + if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) )) + return; + + auto boundaryFlag = flagField->getFlag(boundaryFlagUID); + auto domainFlag = flagField->getFlag(domainFlagUID); + + auto inner = flagField->xyzSize(); + inner.expand( cell_idx_t(-1) ); + + + indexVectorAll.clear(); + indexVectorInner.clear(); + indexVectorOuter.clear(); + + for( auto it = flagField->begin(); it != flagField->end(); ++it ) + { + if( ! isFlagSet(it, domainFlag) ) + continue; + + {%- for dirIdx, offset in stencil_info %} + if ( isFlagSet( it.neighbor({{offset}} {%if dim == 3%}, 0 {%endif %}), boundaryFlag ) ) + { + auto element = {{StructName}}(it.x(), it.y(), {%if dim == 3%} it.z(), {%endif %} {{dirIdx}} ); + indexVectorAll.push_back( element ); + if( inner.contains( it.x(), it.y(), it.z() ) ) + indexVectorInner.push_back( element ); + else + indexVectorOuter.push_back( element ); + } + {% endfor %} + } + + indexVectors->syncGPU(); + } + +private: + void run( IBlock * block, IndexVectors::Type type{% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%}); + + BlockDataID indexVectorID; +public: + {{kernel|generate_members(('indexVector', 'indexVectorSize'))|indent(4)}} +}; + + + +} // namespace {{namespace}} +} // namespace walberla diff --git a/python/lbmpy_walberla/templates/LatticeModel.tmpl.cpp b/python/lbmpy_walberla/templates/LatticeModel.tmpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2b2065cc7600e4d230d7d21f801e8e1044bb7701 --- /dev/null +++ b/python/lbmpy_walberla/templates/LatticeModel.tmpl.cpp @@ -0,0 +1,127 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \\author Martin Bauer <martin.bauer@fau.de> +//====================================================================================================================== + +#include <cmath> + +#include "core/DataTypes.h" +#include "core/Macros.h" +#include "lbm/field/PdfField.h" +#include "lbm/sweeps/Streaming.h" +#include "{{class_name}}.h" + +#ifdef _MSC_VER +# pragma warning( disable : 4458 ) +#endif + +{% if target is equalto 'cpu' -%} +#define FUNC_PREFIX +{%- elif target is equalto 'gpu' -%} +#define FUNC_PREFIX __global__ +{%- endif %} + +#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wfloat-equal" +# pragma GCC diagnostic ignored "-Wshadow" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wunused-variable" +# pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +{% for header in headers %} +#include {{header}} +{% endfor %} + + +using namespace std; + +namespace walberla { +namespace {{namespace}} { + +{{stream_collide_kernel|generate_definition(target)}} +{{collide_kernel|generate_definition(target)}} +{{stream_kernel|generate_definition(target)}} + + +const real_t {{class_name}}::w[{{Q}}] = { {{weights}} }; +const real_t {{class_name}}::wInv[{{Q}}] = { {{inverse_weights}} }; + +void {{class_name}}::Sweep::streamCollide( IBlock * block, const uint_t numberOfGhostLayersToInclude ) +{ + {{stream_collide_kernel|generate_block_data_to_field_extraction(parameters=['pdfs', 'pdfs_tmp'])|indent(4)}} + + auto & lm = dynamic_cast< lbm::PdfField<{{class_name}}> * > (pdfs)->latticeModel(); + WALBERLA_ASSERT_EQUAL( *(lm.blockId), block->getId() ); + + {{stream_collide_kernel|generate_refs_for_kernel_parameters(prefix='lm.', parameters_to_ignore=['pdfs', 'pdfs_tmp'])|indent(4) }} + {{stream_collide_kernel|generate_call('cell_idx_c(numberOfGhostLayersToInclude)')|indent(4)}} + {{stream_collide_kernel|generate_swaps|indent(4)}} +} + +void {{class_name}}::Sweep::collide( IBlock * block, const uint_t numberOfGhostLayersToInclude ) +{ + {{collide_kernel|generate_block_data_to_field_extraction(parameters=['pdfs'])|indent(4)}} + + auto & lm = dynamic_cast< lbm::PdfField<{{class_name}}> * > (pdfs)->latticeModel(); + WALBERLA_ASSERT_EQUAL( *(lm.blockId), block->getId() ); + + {{collide_kernel|generate_refs_for_kernel_parameters(prefix='lm.', parameters_to_ignore=['pdfs', 'pdfs_tmp'])|indent(4) }} + {{collide_kernel|generate_call('cell_idx_c(numberOfGhostLayersToInclude)')|indent(4)}} +} + + +void {{class_name}}::Sweep::stream( IBlock * block, const uint_t numberOfGhostLayersToInclude ) +{ + {{stream_kernel|generate_block_data_to_field_extraction(parameters=['pdfs', 'pdfs_tmp'])|indent(4)}} + + {{stream_kernel|generate_call('cell_idx_c(numberOfGhostLayersToInclude)')|indent(4)}} + + {{stream_kernel|generate_swaps|indent(4)}} +} + + +} // namespace {{namespace}} +} // namespace walberla + + + + +// Buffer Packing + +namespace walberla { +namespace mpi { + +mpi::SendBuffer & operator<< (mpi::SendBuffer & buf, const ::walberla::{{namespace}}::{{class_name}} & lm) +{ + buf << lm.currentLevel; + return buf; +} + +mpi::RecvBuffer & operator>> (mpi::RecvBuffer & buf, ::walberla::{{namespace}}::{{class_name}} & lm) +{ + buf >> lm.currentLevel; + return buf; +} + + +} // namespace mpi +} // namespace walberla + +#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) +# pragma GCC diagnostic pop +#endif \ No newline at end of file diff --git a/python/lbmpy_walberla/templates/LatticeModel.tmpl.h b/python/lbmpy_walberla/templates/LatticeModel.tmpl.h new file mode 100644 index 0000000000000000000000000000000000000000..be160e99e89d596f86511a97db314e35aeaaca18 --- /dev/null +++ b/python/lbmpy_walberla/templates/LatticeModel.tmpl.h @@ -0,0 +1,554 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \\author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + + +#include "core/DataTypes.h" +#include "core/logging/Logging.h" + +#include "field/GhostLayerField.h" +#include "field/SwapableCompare.h" +#include "domain_decomposition/BlockDataID.h" +#include "domain_decomposition/IBlock.h" +#include "stencil/{{stencil_name}}.h" + +#include "lbm/lattice_model/EquilibriumDistribution.h" +#include "lbm/field/Density.h" +#include "lbm/field/DensityAndMomentumDensity.h" +#include "lbm/field/DensityAndVelocity.h" +#include "lbm/field/PressureTensor.h" +#include "lbm/field/ShearRate.h" + +#include <set> + +#ifdef __GNUC__ +#define RESTRICT __restrict__ +#elif _MSC_VER +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +#ifdef WALBERLA_CXX_COMPILER_IS_GNU +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +#ifdef WALBERLA_CXX_COMPILER_IS_CLANG +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wunused-parameter" +#endif + +{% set lmIgnores = ('pdfs', 'pdfs_tmp') %} +{% set lmOffsets = ('block_offset_0', 'block_offset_1', 'block_offset_2') %} + + +// Forward declarations +namespace walberla{ +namespace {{namespace}} { + class {{class_name}}; +}} +namespace walberla { +namespace mpi { + mpi::SendBuffer & operator<< (mpi::SendBuffer & buf, const ::walberla::{{namespace}}::{{class_name}} & lm); + mpi::RecvBuffer & operator>> (mpi::RecvBuffer & buf, ::walberla::{{namespace}}::{{class_name}} & lm); +}} + + + + +namespace walberla { +namespace {{namespace}} { + + +/** +{{class_name}} was generated with lbmpy. Do not edit this file directly. Instead modify {{class_name}}.py. +For details see documentation of lbmpy. + +Usage: + - Create an instance of this lattice model class: the constructor parameters vary depending on the configure + lattice model. A model with constant force needs a single force vector, while a model with variable forces needs + a force field. All constructor parameters are ordered alphabetically. + - Create a PDFField with the lattice model as template argument to store the particle distribution functions. + Use the PDFField to get and modify macroscopic values. + - The internal class {{class_name}}::Sweep is a functor to execute one LB time step. + Stream, collide steps can be executed separately, or together in an optimized stream-pull-collide scheme + +*/ +class {{class_name}} +{ + +public: + typedef stencil::{{stencil_name}} Stencil; + typedef stencil::{{stencil_name}} CommunicationStencil; + static const real_t w[{{Q}}]; + static const real_t wInv[{{Q}}]; + + static const bool compressible = {% if compressible %}true{% else %}false{% endif %}; + + class Sweep + { + public: + Sweep( BlockDataID _pdfsID ) : pdfsID(_pdfsID) {}; + + //void stream ( IBlock * const block, const uint_t numberOfGhostLayersToInclude = uint_t(0) ); + void collide ( IBlock * const block, const uint_t numberOfGhostLayersToInclude = uint_t(0) ); + void streamCollide( IBlock * const block, const uint_t numberOfGhostLayersToInclude = uint_t(0) ); + void stream ( IBlock * const block, const uint_t numberOfGhostLayersToInclude = uint_t(0) ); + + void operator() ( IBlock * const block, const uint_t numberOfGhostLayersToInclude = uint_t(0) ) + { + streamCollide( block, numberOfGhostLayersToInclude ); + } + + private: + {{stream_collide_kernel|generate_members(only_fields=True)|indent(8)}} + }; + + {{class_name}}( {{stream_collide_kernel|generate_constructor_parameters(lmIgnores+lmOffsets) }} ) + : {{ stream_collide_kernel|generate_constructor_initializer_list(lmIgnores+lmOffsets) }}, currentLevel(0) + {}; + + void configure( IBlock & block, StructuredBlockStorage & storage ) { configureBlock( &block, &storage ); } + + // Parameters: + {{stream_collide_kernel|generate_members(lmIgnores)|indent(4)}} + +private: + void configureBlock(IBlock * block, StructuredBlockStorage * storage) + { + {{stream_collide_kernel|generate_block_data_to_field_extraction(lmIgnores+lmOffsets, no_declarations=True)|indent(8)}} + {% if need_block_offsets[0] -%} + block_offset_0 = uint32_c(storage->getBlockCellBB(*block).xMin()); + {% endif -%} + {%- if need_block_offsets[1] -%} + block_offset_1 = uint32_c(storage->getBlockCellBB(*block).yMin()); + {% endif -%} + {%- if need_block_offsets[2] -%} + block_offset_2 = uint32_c(storage->getBlockCellBB(*block).zMin()); + {% endif %} + blockId = &block->getId(); + + {% if refinement_scaling_info -%} + const uint_t targetLevel = block->getBlockStorage().getLevel(*block); + + if( targetLevel != currentLevel ) + { + real_t level_scale_factor(1); + if( currentLevel < targetLevel ) + level_scale_factor = real_c( uint_t(1) << ( targetLevel - currentLevel ) ); + else // currentLevel > targetLevel + level_scale_factor = real_t(1) / real_c( uint_t(1) << ( currentLevel - targetLevel ) ); + + {% for scalingType, name, expression in refinement_scaling_info -%} + {% if scalingType == 'normal' %} + {{name}} = {{expression}}; + {% elif scalingType in ('field_with_f', 'field_xyz') %} + auto it = {{name}}->{% if scalingType == 'field_with_f'%} beginWithGhostLayer(){% else %}beginWithGhostLayerXYZ(){% endif %}; + for( ; it != {{name}}->end(); ++it ) + { + auto x = it.x(); + auto y = it.y(); + auto z = it.z(); + {% if scalingType == 'field_with_f' -%} + auto f = it.f(); + {% endif -%} + *it = {{expression}}; + } + {% endif -%} + {% endfor -%} + + currentLevel = targetLevel; + } + {% endif %} + } + + // Updated by configureBlock: + {{stream_collide_kernel|generate_block_data_to_field_extraction(lmIgnores, declarations_only=True)|indent(4)}} + const IBlockID * blockId; + uint_t currentLevel; + + // Backend classes can access private members: + friend class {{class_name}}::Sweep; + template<class LM, class Enable> friend class EquilibriumDistribution; + template<class LM, class Enable> friend struct Equilibrium; + template<class LM, class Enable> friend struct internal::AdaptVelocityToForce; + template<class LM, class Enable> friend struct Density; + template<class LM> friend struct DensityAndVelocity; + template<class LM, class Enable> friend struct DensityAndMomentumDensity; + template<class LM, class Enable> friend struct MomentumDensity; + template<class LM, class It, class Enable> friend struct DensityAndVelocityRange; + + friend mpi::SendBuffer & ::walberla::mpi::operator<< (mpi::SendBuffer & , const {{class_name}} & ); + friend mpi::RecvBuffer & ::walberla::mpi::operator>> (mpi::RecvBuffer & , {{class_name}} & ); + +}; + + + + +//====================================================================================================================== +// +// Implementation of macroscopic value backend +// +//====================================================================================================================== + + + +template<> +class EquilibriumDistribution< {{class_name}}, void> +{ +public: + typedef typename {{class_name}}::Stencil Stencil; + + static real_t get( const stencil::Direction direction, + const Vector3< real_t > & u = Vector3< real_t >( real_t(0.0) ), + real_t rho = real_t(1.0) ) + { + {% if not compressible %} + rho -= real_t(1.0); + {% endif %} + {{equilibrium_from_direction}} + } + + static real_t getSymmetricPart( const stencil::Direction direction, + const Vector3<real_t> & u = Vector3< real_t >(real_t(0.0)), + real_t rho = real_t(1.0) ) + { + {% if not compressible %} + rho -= real_t(1.0); + {% endif %} + {{symmetric_equilibrium_from_direction}} + } + + static real_t getAsymmetricPart( const stencil::Direction direction, + const Vector3< real_t > & u = Vector3<real_t>( real_t(0.0) ), + real_t rho = real_t(1.0) ) + { + {% if not compressible %} + rho -= real_t(1.0); + {% endif %} + {{asymmetric_equilibrium_from_direction}} + } + + static std::vector< real_t > get( const Vector3< real_t > & u = Vector3<real_t>( real_t(0.0) ), + real_t rho = real_t(1.0) ) + { + {% if not compressible %} + rho -= real_t(1.0); + {% endif %} + + std::vector< real_t > equilibrium( Stencil::Size ); + for( auto d = Stencil::begin(); d != Stencil::end(); ++d ) + { + equilibrium[d.toIdx()] = get(*d, u, rho); + } + return equilibrium; + } +}; + + +namespace internal { + +template<> +struct AdaptVelocityToForce<{{class_name}}, void> +{ + template< typename FieldPtrOrIterator > + static Vector3<real_t> get( FieldPtrOrIterator & it, const {{class_name}} & lm, + const Vector3< real_t > & velocity, const real_t rho ) + { + auto x = it.x(); + auto y = it.y(); + auto z = it.z(); + {% if macroscopic_velocity_shift %} + return velocity - Vector3<real_t>({{macroscopic_velocity_shift | join(",") }} {% if D == 2 %}, real_t(0.0) {%endif %} ); + {% else %} + return velocity; + {% endif %} + } + + static Vector3<real_t> get( const cell_idx_t x, const cell_idx_t y, const cell_idx_t z, const {{class_name}} & lm, + const Vector3< real_t > & velocity, const real_t rho ) + { + {% if macroscopic_velocity_shift %} + + return velocity - Vector3<real_t>({{macroscopic_velocity_shift | join(",") }} {% if D == 2 %}, real_t(0.0) {%endif %} ); + {% else %} + return velocity; + {% endif %} + } +}; +} // namespace internal + + + +template<> +struct Equilibrium< {{class_name}}, void > +{ + + template< typename FieldPtrOrIterator > + static void set( FieldPtrOrIterator & it, + const Vector3< real_t > & u = Vector3< real_t >( real_t(0.0) ), real_t rho = real_t(1.0) ) + { + {%if not compressible %} + rho -= real_t(1.0); + {%endif %} + + {% for eqTerm in equilibrium -%} + it[{{loop.index0 }}] = {{eqTerm}}; + {% endfor -%} + } + + template< typename PdfField_T > + static void set( PdfField_T & pdf, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z, + const Vector3< real_t > & u = Vector3< real_t >( real_t(0.0) ), real_t rho = real_t(1.0) ) + { + {%if not compressible %} + rho -= real_t(1.0); + {%endif %} + + real_t & xyz0 = pdf(x,y,z,0); + {% for eqTerm in equilibrium -%} + pdf.getF( &xyz0, {{loop.index0 }})= {{eqTerm}}; + {% endfor -%} + } +}; + + +template<> +struct Density<{{class_name}}, void> +{ + template< typename FieldPtrOrIterator > + static inline real_t get( const {{class_name}} & , const FieldPtrOrIterator & it ) + { + {% for i in range(Q) -%} + const real_t f_{{i}} = it[{{i}}]; + {% endfor -%} + {{density_getters | indent(8)}} + return rho; + } + + template< typename PdfField_T > + static inline real_t get( const {{class_name}} & , + const PdfField_T & pdf, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z ) + { + const real_t & xyz0 = pdf(x,y,z,0); + {% for i in range(Q) -%} + const real_t f_{{i}} = pdf.getF( &xyz0, {{i}}); + {% endfor -%} + {{density_getters | indent(8)}} + return rho; + } +}; + + +template<> +struct DensityAndVelocity<{{class_name}}> +{ + template< typename FieldPtrOrIterator > + static void set( FieldPtrOrIterator & it, const {{class_name}} & lm, + const Vector3< real_t > & u = Vector3< real_t >( real_t(0.0) ), const real_t rho_in = real_t(1.0) ) + { + auto x = it.x(); + auto y = it.y(); + auto z = it.z(); + + {{density_velocity_setter_macroscopic_values | indent(8)}} + {% if D == 2 -%} + const real_t u_2(0.0); + {% endif %} + + Equilibrium<{{class_name}}>::set(it, Vector3<real_t>(u_0, u_1, u_2), rho{%if not compressible %} + real_t(1) {%endif%}); + } + + template< typename PdfField_T > + static void set( PdfField_T & pdf, const cell_idx_t x, const cell_idx_t y, const cell_idx_t z, const {{class_name}} & lm, + const Vector3< real_t > & u = Vector3< real_t >( real_t(0.0) ), const real_t rho_in = real_t(1.0) ) + { + {{density_velocity_setter_macroscopic_values | indent(8)}} + {% if D == 2 -%} + const real_t u_2(0.0); + {% endif %} + + Equilibrium<{{class_name}}>::set(pdf, x, y, z, Vector3<real_t>(u_0, u_1, u_2), rho {%if not compressible %} + real_t(1) {%endif%}); + } +}; + + +template<typename FieldIteratorXYZ > +struct DensityAndVelocityRange<{{class_name}}, FieldIteratorXYZ> +{ + + static void set( FieldIteratorXYZ & begin, const FieldIteratorXYZ & end, const {{class_name}} & lm, + const Vector3< real_t > & u = Vector3< real_t >( real_t(0.0) ), const real_t rho_in = real_t(1.0) ) + { + for( auto cellIt = begin; cellIt != end; ++cellIt ) + { + const auto x = cellIt.x(); + const auto y = cellIt.y(); + const auto z = cellIt.z(); + {{density_velocity_setter_macroscopic_values | indent(12)}} + {% if D == 2 -%} + const real_t u_2(0.0); + {% endif %} + + Equilibrium<{{class_name}}>::set(cellIt, Vector3<real_t>(u_0, u_1, u_2), rho{%if not compressible %} + real_t(1) {%endif%}); + } + } +}; + + + +template<> +struct DensityAndMomentumDensity<{{class_name}}> +{ + template< typename FieldPtrOrIterator > + static real_t get( Vector3< real_t > & momentumDensity, const {{class_name}} & lm, + const FieldPtrOrIterator & it ) + { + const auto x = it.x(); + const auto y = it.y(); + const auto z = it.z(); + + {% for i in range(Q) -%} + const real_t f_{{i}} = it[{{i}}]; + {% endfor -%} + + {{momentum_density_getter | indent(8) }} + {% for i in range(D) -%} + momentumDensity[{{i}}] = md_{{i}}; + {% endfor %} + return rho; + } + + template< typename PdfField_T > + static real_t get( Vector3< real_t > & momentumDensity, const {{class_name}} & lm, const PdfField_T & pdf, + const cell_idx_t x, const cell_idx_t y, const cell_idx_t z ) + { + const real_t & xyz0 = pdf(x,y,z,0); + {% for i in range(Q) -%} + const real_t f_{{i}} = pdf.getF( &xyz0, {{i}}); + {% endfor -%} + + {{momentum_density_getter | indent(8) }} + {% for i in range(D) -%} + momentumDensity[{{i}}] = md_{{i}}; + {% endfor %} + return rho; + } +}; + + +template<> +struct MomentumDensity< {{class_name}}> +{ + template< typename FieldPtrOrIterator > + static void get( Vector3< real_t > & momentumDensity, const {{class_name}} & lm, const FieldPtrOrIterator & it ) + { + const auto x = it.x(); + const auto y = it.y(); + const auto z = it.z(); + + {% for i in range(Q) -%} + const real_t f_{{i}} = it[{{i}}]; + {% endfor -%} + + {{momentum_density_getter | indent(8) }} + {% for i in range(D) -%} + momentumDensity[{{i}}] = md_{{i}}; + {% endfor %} + } + + template< typename PdfField_T > + static void get( Vector3< real_t > & momentumDensity, const {{class_name}} & lm, const PdfField_T & pdf, + const cell_idx_t x, const cell_idx_t y, const cell_idx_t z ) + { + const real_t & xyz0 = pdf(x,y,z,0); + {% for i in range(Q) -%} + const real_t f_{{i}} = pdf.getF( &xyz0, {{i}}); + {% endfor -%} + + {{momentum_density_getter | indent(8) }} + {% for i in range(D) -%} + momentumDensity[{{i}}] = md_{{i}}; + {% endfor %} + } +}; + + +template<> +struct PressureTensor<{{class_name}}> +{ + template< typename FieldPtrOrIterator > + static void get( Matrix3< real_t > & /* pressureTensor */, const {{class_name}} & /* latticeModel */, const FieldPtrOrIterator & /* it */ ) + { + WALBERLA_ABORT("Not implemented"); + } + + template< typename PdfField_T > + static void get( Matrix3< real_t > & /* pressureTensor */, const {{class_name}} & /* latticeModel */, const PdfField_T & /* pdf */, + const cell_idx_t /* x */, const cell_idx_t /* y */, const cell_idx_t /* z */ ) + { + WALBERLA_ABORT("Not implemented"); + } +}; + + +template<> +struct ShearRate<{{class_name}}> +{ + template< typename FieldPtrOrIterator > + static inline real_t get( const {{class_name}} & /* latticeModel */, const FieldPtrOrIterator & /* it */, + const Vector3< real_t > & /* velocity */, const real_t /* rho */) + { + WALBERLA_ABORT("Not implemented"); + return real_t(0.0); + } + + template< typename PdfField_T > + static inline real_t get( const {{class_name}} & latticeModel, + const PdfField_T & /* pdf */, const cell_idx_t /* x */, const cell_idx_t /* y */, const cell_idx_t /* z */, + const Vector3< real_t > & /* velocity */, const real_t /* rho */ ) + { + WALBERLA_ABORT("Not implemented"); + return real_t(0.0); + } + + static inline real_t get( const std::vector< real_t > & /* nonEquilibrium */, const real_t /* relaxationParam */, + const real_t /* rho */ = real_t(1) ) + { + WALBERLA_ABORT("Not implemented"); + return real_t(0.0); + } +}; + + +} // namespace {{namespace}} +} // namespace walberla + + + +#ifdef WALBERLA_CXX_COMPILER_IS_GNU +#pragma GCC diagnostic pop +#endif + +#ifdef WALBERLA_CXX_COMPILER_IS_CLANG +#pragma clang diagnostic pop +#endif diff --git a/python/lbmpy_walberla/tests/__init__.py b/python/lbmpy_walberla/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/python/lbmpy_walberla/tests/test_walberla_codegen.py b/python/lbmpy_walberla/tests/test_walberla_codegen.py new file mode 100644 index 0000000000000000000000000000000000000000..60b86fa57809035c205bc22ab726a5bd72f8aec3 --- /dev/null +++ b/python/lbmpy_walberla/tests/test_walberla_codegen.py @@ -0,0 +1,107 @@ +import unittest + +import sympy as sp + +import pystencils as ps +from lbmpy.boundaries import UBB, NoSlip +from lbmpy.creationfunctions import create_lb_method, create_lb_update_rule, create_lb_collision_rule +from lbmpy_walberla import RefinementScaling, generate_boundary, generate_lattice_model +from lbmpy_walberla.sparse import ListLbGenerator +from pystencils_walberla import generate_pack_info_for_field, generate_pack_info_from_kernel +from pystencils_walberla.cmake_integration import ManualCodeGenerationContext + + +class WalberlaLbmpyCodegenTest(unittest.TestCase): + + @staticmethod + def test_lattice_model(): + with ManualCodeGenerationContext() as ctx: + force_field = ps.fields("force(3): [3D]", layout='fzyx') + omega = sp.Symbol("omega") + + cr = create_lb_collision_rule(stencil='D3Q19', method='srt', relaxation_rates=[omega], compressible=True, + force_model='guo', force=force_field.center_vector) + + scaling = RefinementScaling() + scaling.add_standard_relaxation_rate_scaling(omega) + scaling.add_force_scaling(force_field) + + generate_lattice_model(ctx, 'SrtWithForceFieldModel', cr, refinement_scaling=scaling) + generate_boundary(ctx, 'MyUBB', UBB([0.05, 0, 0]), cr.method) + generate_boundary(ctx, 'MyNoSlip', NoSlip(), cr.method) + assert 'static const bool compressible = true;' in ctx.files['SrtWithForceFieldModel.h'] + + @staticmethod + def test_sparse(): + from lbmpy.creationfunctions import create_lb_collision_rule + from pystencils import get_code_str + g = ListLbGenerator(create_lb_collision_rule()) + kernel_code = get_code_str(g.kernel()) + assert 'num_cells' in kernel_code + setter_code = get_code_str(g.setter_ast()) + assert 'num_cells' in setter_code + getter_code = get_code_str(g.getter_ast()) + assert 'num_cells' in getter_code + + @staticmethod + def test_pack_info(): + with ManualCodeGenerationContext() as ctx: + f = ps.fields("f(9): [3D]") + generate_pack_info_for_field(ctx, 'MyPackInfo1', f) + + lb_assignments = create_lb_update_rule(stencil='D3Q19', method='srt').main_assignments + generate_pack_info_from_kernel(ctx, 'MyPackInfo2', lb_assignments) + + @staticmethod + def test_incompressible(): + with ManualCodeGenerationContext() as ctx: + omega = sp.Symbol("omega") + + cr = create_lb_collision_rule(stencil='D3Q19', method='srt', relaxation_rates=[omega], compressible=False) + generate_lattice_model(ctx, 'Model', cr) + assert 'static const bool compressible = false;' in ctx.files['Model.h'] + + @staticmethod + def test_output_field(): + with ManualCodeGenerationContext(openmp=True, double_accuracy=True) as ctx: + omega_field = ps.fields("omega_out: [3D]", layout='fzyx') + parameters = { + 'stencil': 'D3Q27', + 'method': 'trt-kbc-n1', + 'compressible': True, + 'entropic': True, + 'omega_output_field': omega_field, + } + cr = create_lb_collision_rule(**parameters) + generate_lattice_model(ctx, 'Model', cr) + + @staticmethod + def test_fluctuating(): + with ManualCodeGenerationContext(openmp=True, double_accuracy=True) as ctx: + omega_shear = sp.symbols("omega") + force_field, vel_field = ps.fields("force(3), velocity(3): [3D]", layout='fzyx') + + # the collision rule of the LB method where the some advanced features + collision_rule = create_lb_collision_rule( + stencil='D3Q19', compressible=True, fluctuating={'seed': 0, 'temperature': 1e-6}, + method='mrt', relaxation_rates=[omega_shear]*19, + force_model='guo', force=force_field.center_vector, + optimization={'cse_global': False} + ) + generate_lattice_model(ctx, 'FluctuatingMRT', collision_rule) + + @staticmethod + def test_boundary_3D(): + with ManualCodeGenerationContext(openmp=True, double_accuracy=True) as ctx: + lb_method = create_lb_method(stencil='D3Q19', method='srt') + generate_boundary(ctx, 'Boundary', NoSlip(), lb_method, target='gpu') + + @staticmethod + def test_boundary_2D(): + with ManualCodeGenerationContext(openmp=True, double_accuracy=True) as ctx: + lb_method = create_lb_method(stencil='D2Q9', method='srt') + generate_boundary(ctx, 'Boundary', NoSlip(), lb_method, target='gpu') + + +if __name__ == '__main__': + unittest.main() diff --git a/python/lbmpy_walberla/walberla_lbm_generation.py b/python/lbmpy_walberla/walberla_lbm_generation.py new file mode 100644 index 0000000000000000000000000000000000000000..05e98a96fe31c1fd001c400f696b4e7c143a762c --- /dev/null +++ b/python/lbmpy_walberla/walberla_lbm_generation.py @@ -0,0 +1,271 @@ +import warnings + +import numpy as np +import sympy as sp +from jinja2 import Environment, PackageLoader, StrictUndefined, Template +from sympy.tensor import IndexedBase + +import pystencils as ps +from lbmpy.creationfunctions import create_lb_update_rule, update_with_default_parameters +from lbmpy.fieldaccess import CollideOnlyInplaceAccessor, StreamPullTwoFieldsAccessor +from lbmpy.relaxationrates import relaxation_rate_scaling +from lbmpy.stencils import get_stencil +from lbmpy.updatekernels import create_lbm_kernel, create_stream_pull_only_kernel +from pystencils import AssignmentCollection, create_kernel +from pystencils.astnodes import SympyAssignment +from pystencils.backends.cbackend import CBackend, CustomSympyPrinter, get_headers +from pystencils.data_types import TypedSymbol, type_all_numbers +from pystencils.field import Field +from pystencils.stencil import have_same_entries, offset_to_direction_string +from pystencils.sympyextensions import get_symmetric_part +from pystencils.transformations import add_types +from pystencils_walberla.codegen import KernelInfo, default_create_kernel_parameters +from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env + +cpp_printer = CustomSympyPrinter() +REFINEMENT_SCALE_FACTOR = sp.Symbol("level_scale_factor") + + +def __lattice_model(generation_context, class_name, lb_method, stream_collide_ast, collide_ast, stream_ast, + refinement_scaling): + stencil_name = get_stencil_name(lb_method.stencil) + if not stencil_name: + raise ValueError("lb_method uses a stencil that is not supported in waLBerla") + is_float = not generation_context.double_accuracy + dtype = np.float32 if is_float else np.float64 + + vel_symbols = lb_method.conserved_quantity_computation.first_order_moment_symbols + rho_sym = sp.Symbol("rho") + pdfs_sym = sp.symbols("f_:%d" % (len(lb_method.stencil),)) + vel_arr_symbols = [IndexedBase(sp.Symbol('u'), shape=(1,))[i] for i in range(len(vel_symbols))] + momentum_density_symbols = [sp.Symbol("md_%d" % (i,)) for i in range(len(vel_symbols))] + + equilibrium = lb_method.get_equilibrium() + equilibrium = equilibrium.new_with_substitutions({a: b for a, b in zip(vel_symbols, vel_arr_symbols)}) + _, _, equilibrium = add_types(equilibrium.main_assignments, "float32" if is_float else "float64", False) + equilibrium = sp.Matrix([e.rhs for e in equilibrium]) + + symmetric_equilibrium = get_symmetric_part(equilibrium, vel_arr_symbols) + asymmetric_equilibrium = sp.expand(equilibrium - symmetric_equilibrium) + + force_model = lb_method.force_model + macroscopic_velocity_shift = None + if force_model: + if hasattr(force_model, 'macroscopic_velocity_shift'): + macroscopic_velocity_shift = [expression_to_code(e, "lm.", ["rho"],dtype=dtype) + for e in force_model.macroscopic_velocity_shift(rho_sym)] + + cqc = lb_method.conserved_quantity_computation + + eq_input_from_input_eqs = cqc.equilibrium_input_equations_from_init_values(sp.Symbol("rho_in"), vel_arr_symbols) + density_velocity_setter_macroscopic_values = equations_to_code(eq_input_from_input_eqs, dtype=dtype, + variables_without_prefix=['rho_in', 'u']) + momentum_density_getter = cqc.output_equations_from_pdfs(pdfs_sym, {'density': rho_sym, + 'momentum_density': momentum_density_symbols}) + constant_suffix = "f" if is_float else "" + + required_headers = get_headers(stream_collide_ast) + + if refinement_scaling: + refinement_scaling_info = [ (e0,e1,expression_to_code(e2, '', dtype=dtype)) for e0,e1,e2 in refinement_scaling.scaling_info ] + else: + refinement_scaling_info = None + + jinja_context = { + 'class_name': class_name, + 'stencil_name': stencil_name, + 'D': lb_method.dim, + 'Q': len(lb_method.stencil), + 'compressible': lb_method.conserved_quantity_computation.compressible, + 'weights': ",".join(str(w.evalf()) + constant_suffix for w in lb_method.weights), + 'inverse_weights': ",".join(str((1/w).evalf()) + constant_suffix for w in lb_method.weights), + + 'equilibrium_from_direction': stencil_switch_statement(lb_method.stencil, equilibrium), + 'symmetric_equilibrium_from_direction': stencil_switch_statement(lb_method.stencil, symmetric_equilibrium), + 'asymmetric_equilibrium_from_direction': stencil_switch_statement(lb_method.stencil, asymmetric_equilibrium), + 'equilibrium': [cpp_printer.doprint(e) for e in equilibrium], + + 'macroscopic_velocity_shift': macroscopic_velocity_shift, + 'density_getters': equations_to_code(cqc.output_equations_from_pdfs(pdfs_sym, {"density": rho_sym}), + variables_without_prefix=[e.name for e in pdfs_sym], dtype=dtype), + 'momentum_density_getter': equations_to_code(momentum_density_getter, variables_without_prefix=pdfs_sym, + dtype=dtype), + 'density_velocity_setter_macroscopic_values': density_velocity_setter_macroscopic_values, + + 'refinement_scaling_info': refinement_scaling_info, + + 'stream_collide_kernel': KernelInfo(stream_collide_ast, ['pdfs_tmp'], [('pdfs', 'pdfs_tmp')], []), + 'collide_kernel': KernelInfo(collide_ast, [], [], []), + 'stream_kernel': KernelInfo(stream_ast, ['pdfs_tmp'], [('pdfs', 'pdfs_tmp')], []), + 'target': 'cpu', + 'namespace': 'lbm', + 'headers': required_headers, + 'need_block_offsets': ['block_offset_{}'.format(i) in [param.symbol.name for param in stream_collide_ast.get_parameters()] for i in range(3)], + } + + env = Environment(loader=PackageLoader('lbmpy_walberla'), undefined=StrictUndefined) + add_pystencils_filters_to_jinja_env(env) + + header = env.get_template('LatticeModel.tmpl.h').render(**jinja_context) + source = env.get_template('LatticeModel.tmpl.cpp').render(**jinja_context) + + generation_context.write_file("{}.h".format(class_name), header) + generation_context.write_file("{}.cpp".format(class_name), source) + + +def generate_lattice_model(generation_context, class_name, collision_rule, refinement_scaling=None, + **create_kernel_params): + + # usually a numpy layout is chosen by default i.e. xyzf - which is bad for waLBerla where at least the spatial + # coordinates should be ordered in reverse direction i.e. zyx + is_float = not generation_context.double_accuracy + dtype = np.float32 if is_float else np.float64 + lb_method = collision_rule.method + + q = len(lb_method.stencil) + dim = lb_method.dim + + create_kernel_params = default_create_kernel_parameters(generation_context, create_kernel_params) + if create_kernel_params['target'] == 'gpu': + raise ValueError("Lattice Models can only be generated for CPUs. To generate LBM on GPUs use sweeps directly") + + src_field = ps.Field.create_generic('pdfs', dim, dtype, index_dimensions=1, layout='fzyx', index_shape=(q,)) + dst_field = ps.Field.create_generic('pdfs_tmp', dim, dtype, index_dimensions=1, layout='fzyx', index_shape=(q,)) + + stream_collide_update_rule = create_lbm_kernel(collision_rule, src_field, dst_field, StreamPullTwoFieldsAccessor()) + stream_collide_ast = create_kernel(stream_collide_update_rule, **create_kernel_params) + stream_collide_ast.function_name = 'kernel_streamCollide' + + collide_update_rule = create_lbm_kernel(collision_rule, src_field, dst_field, CollideOnlyInplaceAccessor()) + collide_ast = create_kernel(collide_update_rule, **create_kernel_params) + collide_ast.function_name = 'kernel_collide' + + stream_update_rule = create_stream_pull_only_kernel(lb_method.stencil, None, 'pdfs', 'pdfs_tmp', 'fzyx', dtype) + stream_ast = create_kernel(stream_update_rule, **create_kernel_params) + stream_ast.function_name = 'kernel_stream' + __lattice_model(generation_context, class_name, lb_method, stream_collide_ast, collide_ast, stream_ast, + refinement_scaling) + + + + +class RefinementScaling: + level_scale_factor = sp.Symbol("level_scale_factor") + + def __init__(self): + self.scaling_info = [] + + def add_standard_relaxation_rate_scaling(self, viscosity_relaxation_rate): + self.add_scaling(viscosity_relaxation_rate, relaxation_rate_scaling) + + def add_force_scaling(self, force_parameter): + self.add_scaling(force_parameter, lambda param, factor: param * factor) + + def add_scaling(self, parameter, scaling_rule): + """ + Adds a scaling rule, how parameters on refined blocks are modified + + :param parameter: parameter to modify: may either be a Field, Field.Access or a Symbol + :param scaling_rule: function taking the parameter to be scaled as symbol and the scaling factor i.e. + how much finer the current block is compared to coarsest resolution + """ + if isinstance(parameter, Field): + field = parameter + name = field.name + if field.index_dimensions > 0: + scaling_type = 'field_with_f' + field_access = field(sp.Symbol("f")) + else: + scaling_type = 'field_xyz' + field_access = field.center + expr = scaling_rule(field_access, self.level_scale_factor) + self.scaling_info.append((scaling_type, name, expr)) + elif isinstance(parameter, Field.Access): + field_access = parameter + expr = scaling_rule(field_access, self.level_scale_factor) + name = field_access.field.name + self.scaling_info.append(('field_xyz', name, expr)) + elif isinstance(parameter, sp.Symbol): + expr = scaling_rule(parameter, self.level_scale_factor) + self.scaling_info.append(('normal', parameter.name, expr)) + elif isinstance(parameter, list) or isinstance(parameter, tuple): + for p in parameter: + self.add_scaling(p, scaling_rule) + else: + raise ValueError("Invalid value for viscosity_relaxation_rate") + + +# ------------------------------------------ Internal ------------------------------------------------------------------ + + +def stencil_switch_statement(stencil, values): + template = Template(""" + using namespace stencil; + switch( direction ) { + {% for direction_name, value in dir_to_value_dict.items() -%} + case {{direction_name}}: return {{value}}; + {% endfor -%} + default: + WALBERLA_ABORT("Invalid Direction"); + } + """) + + dir_to_value_dict = {offset_to_direction_string(d): cpp_printer.doprint(v) for d, v in zip(stencil, values)} + return template.render(dir_to_value_dict=dir_to_value_dict, undefined=StrictUndefined) + + +def field_and_symbol_substitute(expr, variable_prefix="lm.", variables_without_prefix=[]): + variables_without_prefix = [v.name if isinstance(v, sp.Symbol) else v for v in variables_without_prefix] + substitutions = {} + for sym in expr.atoms(sp.Symbol): + if isinstance(sym, Field.Access): + fa = sym + prefix = "" if fa.field.name in variables_without_prefix else variable_prefix + if fa.field.index_dimensions == 0: + substitutions[fa] = sp.Symbol("%s%s->get(x,y,z)" % (prefix, fa.field.name)) + else: + assert fa.field.index_dimensions == 1, "walberla supports only 0 or 1 index dimensions" + substitutions[fa] = sp.Symbol("%s%s->get(x,y,z,%s)" % (prefix, fa.field.name, fa.index[0])) + else: + if sym.name not in variables_without_prefix: + substitutions[sym] = sp.Symbol(variable_prefix + sym.name) + return expr.subs(substitutions) + + +def expression_to_code(expr, variable_prefix="lm.", variables_without_prefix=[],dtype="double"): + """ + Takes a sympy expression and creates a C code string from it. Replaces field accesses by + walberla field accesses i.e. field_W^1 -> field->get(-1, 0, 0, 1) + :param expr: sympy expression + :param variable_prefix: all variables (and field) are prefixed with this string + this is used for member variables mostly + :param variables_without_prefix: this variables are not prefixed + :return: code string + """ + return cpp_printer.doprint(type_expr(field_and_symbol_substitute(expr, variable_prefix, variables_without_prefix),dtype=dtype)) + +def type_expr(eq, dtype): + eq=type_all_numbers(eq,dtype=dtype) + return eq.subs({s: TypedSymbol(s.name, dtype) for s in eq.atoms(sp.Symbol)}) + +def equations_to_code(equations, variable_prefix="lm.", variables_without_prefix=[], dtype="double"): + + if isinstance(equations, AssignmentCollection): + equations = equations.all_assignments + + variables_without_prefix = list(variables_without_prefix) + c_backend = CBackend() + result = [] + left_hand_side_names = [e.lhs.name for e in equations] + for eq in equations: + assignment = SympyAssignment(type_expr(eq.lhs,dtype=dtype), + type_expr(field_and_symbol_substitute(eq.rhs, variable_prefix, + variables_without_prefix + left_hand_side_names),dtype=dtype)) + result.append(c_backend(assignment)) + return "\n".join(result) + + +def get_stencil_name(stencil): + for name in ('D2Q9', 'D3Q15', 'D3Q19', 'D3Q27'): + if have_same_entries(stencil, get_stencil(name, 'walberla')): + return name diff --git a/python/pystencils_walberla/__init__.py b/python/pystencils_walberla/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..767581a754edcf025f743c15338ef2a4b41c7624 --- /dev/null +++ b/python/pystencils_walberla/__init__.py @@ -0,0 +1,8 @@ +from .cmake_integration import CodeGeneration +from .codegen import ( + generate_pack_info, generate_pack_info_for_field, generate_pack_info_from_kernel, + generate_mpidtype_info_from_kernel, generate_sweep) + +__all__ = ['CodeGeneration', + 'generate_sweep', 'generate_pack_info_from_kernel', 'generate_pack_info_for_field', 'generate_pack_info', + 'generate_mpidtype_info_from_kernel'] diff --git a/python/pystencils_walberla/cmake_integration.py b/python/pystencils_walberla/cmake_integration.py new file mode 100644 index 0000000000000000000000000000000000000000..4bc5586afd5212a746bdaaa83a046bef5a24e115 --- /dev/null +++ b/python/pystencils_walberla/cmake_integration.py @@ -0,0 +1,111 @@ +""" +Mechanism for integrating code generation into walberla's CMake system. +CMake needs to determine which C++ source files are generated by which Python script. +The list of files should be available fast, without running the generation process itself. +Thus all code generation function are registered at a single class that manages this process. + +Usage example: + from pystencils_walberla.cmake_integration import codegen + codegen.register(['MyClass.h', 'MyClass.cpp'], functionReturningTwoStringsForHeaderAndCpp) + +""" +import json +import os +import sys +import warnings + +__all__ = ['CodeGeneration', 'ManualCodeGenerationContext'] + + +class CodeGeneration: + def __init__(self): + expected_files, cmake_vars = parse_json_args() + self.context = CodeGenerationContext(cmake_vars) + self.expected_files = expected_files + + def __enter__(self): + return self.context + + def __exit__(self, *args): + if self.expected_files and (set(self.context.files_written) != set(self.expected_files)): + expected = set(os.path.realpath(f) for f in self.expected_files) + written = set(os.path.realpath(f) for f in self.context.files_written) + only_in_cmake = expected - written + only_generated = written - expected + error_message = "Generated files specified not correctly in cmake with 'waLBerla_python_file_generates'\n" + if only_in_cmake: + error_message += "Files only specified in CMake {}\n".format( + [os.path.basename(p) for p in only_in_cmake]) + if only_generated: + error_message += "Unexpected generated files {}\n".format([os.path.basename(p) for p in only_generated]) + raise ValueError(error_message) + + +def parse_json_args(): + default = {'EXPECTED_FILES': [], + 'CMAKE_VARS': {'WALBERLA_BUILD_WITH_OPENMP': False, + 'WALBERLA_OPTIMIZE_FOR_LOCALHOST': False, + 'WALBERLA_DOUBLE_ACCURACY': True, + 'WALBERLA_BUILD_WITH_MPI': True, + 'WALBERLA_BUILD_WITH_CUDA': False, + "CODEGEN_CFG": ""} + } + + if len(sys.argv) == 2: + try: + parsed = json.loads(sys.argv[1]) + except json.JSONDecodeError: + warnings.warn("Could not parse JSON arguments: " + sys.argv[1]) + parsed = default + else: + parsed = default + expected_files = parsed['EXPECTED_FILES'] + cmake_vars = {} + for key, value in parsed['CMAKE_VARS'].items(): + if value in ("ON", "1", "YES"): + value = True + elif value in ("OFF", "0", "NO"): + value = False + cmake_vars[key] = value + return expected_files, cmake_vars + + +class CodeGenerationContext: + def __init__(self, cmake_vars): + self.files_written = [] + self.openmp = cmake_vars['WALBERLA_BUILD_WITH_OPENMP'] + self.optimize_for_localhost = cmake_vars['WALBERLA_OPTIMIZE_FOR_LOCALHOST'] + self.mpi = cmake_vars['WALBERLA_BUILD_WITH_MPI'] + self.double_accuracy = cmake_vars['WALBERLA_DOUBLE_ACCURACY'] + self.cuda = cmake_vars['WALBERLA_BUILD_WITH_CUDA'] + self.config = cmake_vars['CODEGEN_CFG'].strip() + + def write_file(self, name, content): + self.files_written.append(os.path.abspath(name)) + with open(name, 'w') as f: + f.write(content) + + +class ManualCodeGenerationContext: + """Context for testing - does not actually write files but puts them into a public dict + Environment parameters like if OpenMP, MPI or CPU-specific optimization should be used can be explicitly passed + to constructor instead of getting them from CMake + """ + + def __init__(self, openmp=False, optimize_for_localhost=False, mpi=True, double_accuracy=True): + self.openmp = openmp + self.optimize_for_localhost = optimize_for_localhost + self.mpi = mpi + self.double_accuracy = double_accuracy + self.files = dict() + self.cuda = False + self.config = "" + + def write_file(self, name, content): + self.files[name] = content + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + pass diff --git a/python/pystencils_walberla/codegen.py b/python/pystencils_walberla/codegen.py new file mode 100644 index 0000000000000000000000000000000000000000..fb665277b56b62a41063334a0cc5e2968cb1a7e4 --- /dev/null +++ b/python/pystencils_walberla/codegen.py @@ -0,0 +1,370 @@ +from collections import OrderedDict, defaultdict +from itertools import product +from typing import Dict, Optional, Sequence, Tuple + +from jinja2 import Environment, PackageLoader, StrictUndefined + +from pystencils import ( + Assignment, AssignmentCollection, Field, FieldType, create_kernel, create_staggered_kernel) +from pystencils.astnodes import KernelFunction +from pystencils.backends.cbackend import get_headers +from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets +from pystencils.stencil import inverse_direction, offset_to_direction_string +from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env + +__all__ = ['generate_sweep', 'generate_pack_info', 'generate_pack_info_for_field', 'generate_pack_info_from_kernel', + 'generate_mpidtype_info_from_kernel', 'default_create_kernel_parameters', 'KernelInfo'] + + +def generate_sweep(generation_context, class_name, assignments, + namespace='pystencils', field_swaps=(), staggered=False, varying_parameters=(), + inner_outer_split=False, + **create_kernel_params): + """Generates a waLBerla sweep from a pystencils representation. + + The constructor of the C++ sweep class expects all kernel parameters (fields and parameters) in alphabetical order. + Fields have to passed using BlockDataID's pointing to walberla fields + + Args: + generation_context: build system context filled with information from waLBerla's CMake. The context for example + defines where to write generated files, if OpenMP is available or which SIMD instruction + set should be used. See waLBerla examples on how to get a context. + class_name: name of the generated sweep class + assignments: list of assignments defining the stencil update rule or a :class:`KernelFunction` + namespace: the generated class is accessible as walberla::<namespace>::<class_name> + field_swaps: sequence of field pairs (field, temporary_field). The generated sweep only gets the first field + as argument, creating a temporary field internally which is swapped with the first field after + each iteration. + staggered: set to True to create staggered kernels with `pystencils.create_staggered_kernel` + varying_parameters: Depending on the configuration, the generated kernels may receive different arguments for + different setups. To not have to adapt the C++ application when then parameter change, + the varying_parameters sequence can contain parameter names, which are always expected by + the C++ class constructor even if the kernel does not need them. + inner_outer_split: if True generate a sweep that supports separate iteration over inner and outer regions + to allow for communication hiding. + **create_kernel_params: remaining keyword arguments are passed to `pystencils.create_kernel` + """ + create_kernel_params = default_create_kernel_parameters(generation_context, create_kernel_params) + + if not generation_context.cuda and create_kernel_params['target'] == 'gpu': + return + + if isinstance(assignments, KernelFunction): + ast = assignments + create_kernel_params['target'] = ast.target + elif not staggered: + ast = create_kernel(assignments, **create_kernel_params) + else: + ast = create_staggered_kernel(assignments, **create_kernel_params) + + def to_name(f): + return f.name if isinstance(f, Field) else f + + field_swaps = tuple((to_name(e[0]), to_name(e[1])) for e in field_swaps) + temporary_fields = tuple(e[1] for e in field_swaps) + + ast.function_name = class_name.lower() + + env = Environment(loader=PackageLoader('pystencils_walberla'), undefined=StrictUndefined) + add_pystencils_filters_to_jinja_env(env) + + if inner_outer_split is False: + jinja_context = { + 'kernel': KernelInfo(ast, temporary_fields, field_swaps, varying_parameters), + 'namespace': namespace, + 'class_name': class_name, + 'target': create_kernel_params.get("target", "cpu"), + 'headers': get_headers(ast), + } + header = env.get_template("Sweep.tmpl.h").render(**jinja_context) + source = env.get_template("Sweep.tmpl.cpp").render(**jinja_context) + else: + main_kernel_info = KernelInfo(ast, temporary_fields, field_swaps, varying_parameters) + representative_field = {p.field_name for p in main_kernel_info.parameters if p.is_field_parameter} + representative_field = sorted(representative_field)[0] + + jinja_context = { + 'kernel': main_kernel_info, + 'namespace': namespace, + 'class_name': class_name, + 'target': create_kernel_params.get("target", "cpu"), + 'field': representative_field, + 'headers': get_headers(ast), + } + header = env.get_template("SweepInnerOuter.tmpl.h").render(**jinja_context) + source = env.get_template("SweepInnerOuter.tmpl.cpp").render(**jinja_context) + + source_extension = "cpp" if create_kernel_params.get("target", "cpu") == "cpu" else "cu" + generation_context.write_file("{}.h".format(class_name), header) + generation_context.write_file("{}.{}".format(class_name, source_extension), source) + + +def generate_pack_info_for_field(generation_context, class_name: str, field: Field, + direction_subset: Optional[Tuple[Tuple[int, int, int]]] = None, + **create_kernel_params): + """Creates a pack info for a pystencils field assuming a pull-type stencil, packing all cell elements. + + Args: + generation_context: see documentation of `generate_sweep` + class_name: name of the generated class + field: pystencils field for which to generate pack info + direction_subset: optional sequence of directions for which values should be packed + otherwise a D3Q27 stencil is assumed + **create_kernel_params: remaining keyword arguments are passed to `pystencils.create_kernel` + """ + if not direction_subset: + direction_subset = tuple((i, j, k) for i, j, k in product(*[(-1, 0, 1)] * 3)) + + all_index_accesses = [field(*ind) for ind in product(*[range(s) for s in field.index_shape])] + return generate_pack_info(generation_context, class_name, {direction_subset: all_index_accesses}, + **create_kernel_params) + + +def generate_pack_info_from_kernel(generation_context, class_name: str, assignments: Sequence[Assignment], + kind='pull', **create_kernel_params): + """Generates a waLBerla GPU PackInfo from a (pull) kernel. + + Args: + generation_context: see documentation of `generate_sweep` + class_name: name of the generated class + assignments: list of assignments from the compute kernel - generates PackInfo for "pull" part only + i.e. the kernel is expected to only write to the center + kind: + **create_kernel_params: remaining keyword arguments are passed to `pystencils.create_kernel` + """ + assert kind in ('push', 'pull') + reads = set() + writes = set() + + if isinstance(assignments, AssignmentCollection): + assignments = assignments.all_assignments + + for a in assignments: + if not isinstance(a, Assignment): + continue + reads.update(a.rhs.atoms(Field.Access)) + writes.update(a.lhs.atoms(Field.Access)) + spec = defaultdict(set) + if kind == 'pull': + for fa in reads: + assert all(abs(e) <= 1 for e in fa.offsets) + if all(offset == 0 for offset in fa.offsets): + continue + comm_direction = inverse_direction(fa.offsets) + for comm_dir in comm_directions(comm_direction): + spec[(comm_dir,)].add(fa.field.center(*fa.index)) + elif kind == 'push': + for fa in writes: + assert all(abs(e) <= 1 for e in fa.offsets) + if all(offset == 0 for offset in fa.offsets): + continue + for comm_dir in comm_directions(fa.offsets): + spec[(comm_dir,)].add(fa) + else: + raise ValueError("Invalid 'kind' parameter") + return generate_pack_info(generation_context, class_name, spec, **create_kernel_params) + + +def generate_pack_info(generation_context, class_name: str, + directions_to_pack_terms: Dict[Tuple[Tuple], Sequence[Field.Access]], + namespace='pystencils', + **create_kernel_params): + """Generates a waLBerla GPU PackInfo + + Args: + generation_context: see documentation of `generate_sweep` + class_name: name of the generated class + directions_to_pack_terms: maps tuples of directions to read field accesses, specifying which values have to be + packed for which direction + namespace: inner namespace of the generated class + **create_kernel_params: remaining keyword arguments are passed to `pystencils.create_kernel` + """ + items = [(e[0], sorted(e[1], key=lambda x: str(x))) for e in directions_to_pack_terms.items()] + items = sorted(items, key=lambda e: e[0]) + directions_to_pack_terms = OrderedDict(items) + + create_kernel_params = default_create_kernel_parameters(generation_context, create_kernel_params) + target = create_kernel_params.get('target', 'cpu') + + template_name = "CpuPackInfo.tmpl" if target == 'cpu' else 'GpuPackInfo.tmpl' + + fields_accessed = set() + for terms in directions_to_pack_terms.values(): + for term in terms: + assert isinstance(term, Field.Access) # and all(e == 0 for e in term.offsets) + fields_accessed.add(term) + + field_names = {fa.field.name for fa in fields_accessed} + + data_types = {fa.field.dtype for fa in fields_accessed} + if len(data_types) == 0: + raise ValueError("No fields to pack!") + if len(data_types) != 1: + err_detail = "\n".join(" - {} [{}]".format(f.name, f.dtype) for f in fields_accessed) + raise NotImplementedError("Fields of different data types are used - this is not supported.\n" + err_detail) + dtype = data_types.pop() + + pack_kernels = OrderedDict() + unpack_kernels = OrderedDict() + all_accesses = set() + elements_per_cell = OrderedDict() + for direction_set, terms in directions_to_pack_terms.items(): + for d in direction_set: + if not all(abs(i) <= 1 for i in d): + raise NotImplementedError("Only first neighborhood supported") + + buffer = Field.create_generic('buffer', spatial_dimensions=1, field_type=FieldType.BUFFER, + dtype=dtype.numpy_dtype, index_shape=(len(terms),)) + + direction_strings = tuple(offset_to_direction_string(d) for d in direction_set) + all_accesses.update(terms) + + pack_assignments = [Assignment(buffer(i), term) for i, term in enumerate(terms)] + pack_ast = create_kernel(pack_assignments, **create_kernel_params, ghost_layers=0) + pack_ast.function_name = 'pack_{}'.format("_".join(direction_strings)) + unpack_assignments = [Assignment(term, buffer(i)) for i, term in enumerate(terms)] + unpack_ast = create_kernel(unpack_assignments, **create_kernel_params, ghost_layers=0) + unpack_ast.function_name = 'unpack_{}'.format("_".join(direction_strings)) + + pack_kernels[direction_strings] = KernelInfo(pack_ast) + unpack_kernels[direction_strings] = KernelInfo(unpack_ast) + elements_per_cell[direction_strings] = len(terms) + + fused_kernel = create_kernel([Assignment(buffer.center, t) for t in all_accesses], **create_kernel_params) + + jinja_context = { + 'class_name': class_name, + 'pack_kernels': pack_kernels, + 'unpack_kernels': unpack_kernels, + 'fused_kernel': KernelInfo(fused_kernel), + 'elements_per_cell': elements_per_cell, + 'headers': get_headers(fused_kernel), + 'target': target, + 'dtype': dtype, + 'field_name': field_names.pop(), + 'namespace': namespace, + } + env = Environment(loader=PackageLoader('pystencils_walberla'), undefined=StrictUndefined) + add_pystencils_filters_to_jinja_env(env) + header = env.get_template(template_name + ".h").render(**jinja_context) + source = env.get_template(template_name + ".cpp").render(**jinja_context) + + source_extension = "cpp" if target == "cpu" else "cu" + generation_context.write_file("{}.h".format(class_name), header) + generation_context.write_file("{}.{}".format(class_name, source_extension), source) + + +def generate_mpidtype_info_from_kernel(generation_context, class_name: str, + assignments: Sequence[Assignment], kind='pull', namespace='pystencils', ): + assert kind in ('push', 'pull') + reads = set() + writes = set() + + if isinstance(assignments, AssignmentCollection): + assignments = assignments.all_assignments + + for a in assignments: + if not isinstance(a, Assignment): + continue + reads.update(a.rhs.atoms(Field.Access)) + writes.update(a.lhs.atoms(Field.Access)) + + spec = defaultdict(set) + if kind == 'pull': + read_fields = set(fa.field for fa in reads) + assert len(read_fields) == 1, "Only scenarios where one fields neighbors are accessed" + field = read_fields.pop() + for fa in reads: + assert all(abs(e) <= 1 for e in fa.offsets) + if all(offset == 0 for offset in fa.offsets): + continue + comm_direction = inverse_direction(fa.offsets) + for comm_dir in comm_directions(comm_direction): + assert len(fa.index) == 1, "Supports only fields with a single index dimension" + spec[(offset_to_direction_string(comm_dir),)].add(fa.index[0]) + elif kind == 'push': + written_fields = set(fa.field for fa in writes) + assert len(written_fields) == 1, "Only scenarios where one fields neighbors are accessed" + field = written_fields.pop() + + for fa in writes: + assert all(abs(e) <= 1 for e in fa.offsets) + if all(offset == 0 for offset in fa.offsets): + continue + for comm_dir in comm_directions(fa.offsets): + assert len(fa.index) == 1, "Supports only fields with a single index dimension" + spec[(offset_to_direction_string(comm_dir),)].add(fa.index[0]) + else: + raise ValueError("Invalid 'kind' parameter") + + jinja_context = { + 'class_name': class_name, + 'namespace': namespace, + 'kind': kind, + 'field_name': field.name, + 'f_size': field.index_shape[0], + 'spec': spec, + } + env = Environment(loader=PackageLoader('pystencils_walberla'), undefined=StrictUndefined) + header = env.get_template("MpiDtypeInfo.tmpl.h").render(**jinja_context) + generation_context.write_file("{}.h".format(class_name), header) + + +# ---------------------------------- Internal -------------------------------------------------------------------------- + + +class KernelInfo: + def __init__(self, ast, temporary_fields=(), field_swaps=(), varying_parameters=()): + self.ast = ast + self.temporary_fields = tuple(temporary_fields) + self.field_swaps = tuple(field_swaps) + self.varying_parameters = tuple(varying_parameters) + self.parameters = ast.get_parameters() # cache parameters here + + +def default_create_kernel_parameters(generation_context, params): + default_dtype = "float64" if generation_context.double_accuracy else 'float32' + + if generation_context.optimize_for_localhost: + supported_instruction_sets = get_supported_instruction_sets() + if supported_instruction_sets: + default_vec_is = get_supported_instruction_sets()[-1] + else: # if cpuinfo package is not installed + default_vec_is = 'sse' + else: + default_vec_is = None + + params['target'] = params.get('target', 'cpu') + params['data_type'] = params.get('data_type', default_dtype) + params['cpu_openmp'] = params.get('cpu_openmp', generation_context.openmp) + params['cpu_vectorize_info'] = params.get('cpu_vectorize_info', {}) + + vec = params['cpu_vectorize_info'] + vec['instruction_set'] = vec.get('instruction_set', default_vec_is) + vec['assume_inner_stride_one'] = True + vec['assume_aligned'] = vec.get('assume_aligned', False) + vec['nontemporal'] = vec.get('nontemporal', False) + return params + + +def comm_directions(direction): + if all(e == 0 for e in direction): + yield direction + binary_numbers_list = binary_numbers(len(direction)) + for comm_direction in binary_numbers_list: + for i in range(len(direction)): + if direction[i] == 0: + comm_direction[i] = 0 + if direction[i] == -1 and comm_direction[i] == 1: + comm_direction[i] = -1 + if not all(e == 0 for e in comm_direction): + yield tuple(comm_direction) + + +def binary_numbers(n): + result = list() + for i in range(1 << n): + binary_number = bin(i)[2:] + binary_number = '0' * (n - len(binary_number)) + binary_number + result.append((list(map(int, binary_number)))) + return result diff --git a/python/pystencils_walberla/jinja_filters.py b/python/pystencils_walberla/jinja_filters.py new file mode 100644 index 0000000000000000000000000000000000000000..174b9dfecbaf6bd0c1df4d391eca5cb1a5e7c7c3 --- /dev/null +++ b/python/pystencils_walberla/jinja_filters.py @@ -0,0 +1,376 @@ +import jinja2 +import sympy as sp + +from pystencils import TypedSymbol +from pystencils.backends.cbackend import generate_c +from pystencils.backends.cuda_backend import CudaSympyPrinter +from pystencils.data_types import get_base_type +from pystencils.field import FieldType +from pystencils.kernelparameters import SHAPE_DTYPE +from pystencils.sympyextensions import prod + +temporary_fieldMemberTemplate = """ +private: std::set< {type} *, field::SwapableCompare< {type} * > > cache_{original_field_name}_;""" + +temporary_fieldTemplate = """ +// Getting temporary field {tmp_field_name} +auto it = cache_{original_field_name}_.find( {original_field_name} ); +if( it != cache_{original_field_name}_.end() ) +{{ + {tmp_field_name} = *it; +}} +else +{{ + {tmp_field_name} = {original_field_name}->cloneUninitialized(); + cache_{original_field_name}_.insert({tmp_field_name}); +}} +""" + +temporary_constructor = """ +~{class_name}() {{ {contents} }} +""" + +delete_loop = """ + for(auto p: cache_{original_field_name}_) {{ + delete p; + }} +""" + + +def make_field_type(dtype, f_size, is_gpu): + if is_gpu: + return "cuda::GPUField<%s>" % (dtype,) + else: + return "GhostLayerField<%s, %d>" % (dtype, f_size) + + +def get_field_fsize(field): + """Determines the size of the index coordinate. Since walberla fields only support one index dimension, + pystencils fields with multiple index dimensions are linearized to a single index dimension. + """ + assert field.has_fixed_index_shape, \ + "All Fields have to be created with fixed index coordinate shape using index_shape=(q,) " + str(field.name) + + if field.index_dimensions == 0: + return 1 + else: + return prod(field.index_shape) + + +def get_field_stride(param): + field = param.fields[0] + type_str = get_base_type(param.symbol.dtype).base_name + stride_names = ['xStride()', 'yStride()', 'zStride()', 'fStride()'] + stride_names = ["%s(%s->%s)" % (type_str, param.field_name, e) for e in stride_names] + strides = stride_names[:field.spatial_dimensions] + if field.index_dimensions > 0: + additional_strides = [1] + for shape in reversed(field.index_shape[1:]): + additional_strides.append(additional_strides[-1] * shape) + assert len(additional_strides) == field.index_dimensions + f_stride_name = stride_names[-1] + strides.extend(["%s(%d * %s)" % (type_str, e, f_stride_name) for e in reversed(additional_strides)]) + return strides[param.symbol.coordinate] + + +def generate_declaration(kernel_info, target='cpu'): + """Generates the declaration of the kernel function""" + ast = kernel_info.ast + result = generate_c(ast, signature_only=True, dialect='cuda' if target == 'gpu' else 'c') + ";" + result = "namespace internal_%s {\n%s\n}" % (ast.function_name, result,) + return result + + +def generate_definition(kernel_info, target='cpu'): + """Generates the definition (i.e. implementation) of the kernel function""" + ast = kernel_info.ast + result = generate_c(ast, dialect='cuda' if target == 'gpu' else 'c') + result = "namespace internal_%s {\nstatic %s\n}" % (ast.function_name, result) + return result + + +def field_extraction_code(field, is_temporary, declaration_only=False, + no_declaration=False, is_gpu=False): + """Returns code string for getting a field pointer. + + This can happen in two ways: either the field is extracted from a walberla block, or a temporary field to swap is + created. + + Args: + field: the field for which the code should be created + is_temporary: new_filtered field from block (False) or create a temporary copy of an existing field (True) + declaration_only: only create declaration instead of the full code + no_declaration: create the extraction code, and assume that declarations are elsewhere + is_gpu: if the field is a GhostLayerField or a GpuField + """ + + # Determine size of f coordinate which is a template parameter + f_size = get_field_fsize(field) + field_name = field.name + dtype = get_base_type(field.dtype) + field_type = make_field_type(dtype, f_size, is_gpu) + + if not is_temporary: + dtype = get_base_type(field.dtype) + field_type = make_field_type(dtype, f_size, is_gpu) + if declaration_only: + return "%s * %s;" % (field_type, field_name) + else: + prefix = "" if no_declaration else "auto " + return "%s%s = block->uncheckedFastGetData< %s >(%sID);" % (prefix, field_name, field_type, field_name) + else: + assert field_name.endswith('_tmp') + original_field_name = field_name[:-len('_tmp')] + if declaration_only: + return "%s * %s;" % (field_type, field_name) + else: + declaration = "{type} * {tmp_field_name};".format(type=field_type, tmp_field_name=field_name) + tmp_field_str = temporary_fieldTemplate.format(original_field_name=original_field_name, + tmp_field_name=field_name, type=field_type) + return tmp_field_str if no_declaration else declaration + tmp_field_str + + +@jinja2.contextfilter +def generate_block_data_to_field_extraction(ctx, kernel_info, parameters_to_ignore=(), parameters=None, + declarations_only=False, no_declarations=False): + """Generates code that extracts all required fields of a kernel from a walberla block storage.""" + if parameters is not None: + assert parameters_to_ignore == () + field_parameters = [] + for param in kernel_info.parameters: + if param.is_field_pointer and param.field_name in parameters: + field_parameters.append(param.fields[0]) + else: + field_parameters = [] + for param in kernel_info.parameters: + if param.is_field_pointer and param.field_name not in parameters_to_ignore: + field_parameters.append(param.fields[0]) + + normal_fields = {f for f in field_parameters if f.name not in kernel_info.temporary_fields} + temporary_fields = {f for f in field_parameters if f.name in kernel_info.temporary_fields} + + args = { + 'declaration_only': declarations_only, + 'no_declaration': no_declarations, + 'is_gpu': ctx['target'] == 'gpu', + } + result = "\n".join(field_extraction_code(field=field, is_temporary=False, **args) for field in normal_fields) + "\n" + result += "\n".join(field_extraction_code(field=field, is_temporary=True, **args) for field in temporary_fields) + return result + + +def generate_refs_for_kernel_parameters(kernel_info, prefix, parameters_to_ignore): + symbols = {p.field_name for p in kernel_info.parameters if p.is_field_pointer} + symbols.update(p.symbol.name for p in kernel_info.parameters if not p.is_field_parameter) + symbols.difference_update(parameters_to_ignore) + return "\n".join("auto & %s = %s%s;" % (s, prefix, s) for s in symbols) + + +@jinja2.contextfilter +def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=None, stream='0', + spatial_shape_symbols=()): + """Generates the function call to a pystencils kernel + + Args: + kernel_info: + ghost_layers_to_include: if left to 0, only the inner part of the ghost layer field is looped over + a CHECK is inserted that the field has as many ghost layers as the pystencils AST + needs. This parameter specifies how many ghost layers the kernel should view as + "inner area". The ghost layer field has to have the required number of ghost layers + remaining. Parameter has to be left to default if cell_interval is given. + cell_interval: Defines the name (string) of a walberla CellInterval object in scope, + that defines the inner region for the kernel to loop over. Parameter has to be left to default + if ghost_layers_to_include is specified. + stream: optional name of cuda stream variable + spatial_shape_symbols: relevant only for gpu kernels - to determine CUDA block and grid sizes the iteration + region (i.e. field shape) has to be known. This can normally be inferred by the kernel + parameters - however in special cases like boundary conditions a manual specification + may be necessary. + """ + assert isinstance(ghost_layers_to_include, str) or ghost_layers_to_include >= 0 + ast = kernel_info.ast + ast_params = kernel_info.parameters + is_cpu = ctx['target'] == 'cpu' + + ghost_layers_to_include = sp.sympify(ghost_layers_to_include) + if ast.ghost_layers is None: + required_ghost_layers = 0 + else: + # ghost layer info is ((x_gl_front, x_gl_end), (y_gl_front, y_gl_end).. ) + required_ghost_layers = max(max(ast.ghost_layers)) + + kernel_call_lines = [] + + def get_start_coordinates(field_object): + if cell_interval is None: + return [-ghost_layers_to_include - required_ghost_layers] * field_object.spatial_dimensions + else: + assert ghost_layers_to_include == 0 + return [sp.Symbol("{ci}.{coord}Min()".format(coord=coord_name, ci=cell_interval)) - required_ghost_layers + for coord_name in ('x', 'y', 'z')] + + def get_end_coordinates(field_object): + if cell_interval is None: + shape_names = ['xSize()', 'ySize()', 'zSize()'][:field_object.spatial_dimensions] + offset = 2 * ghost_layers_to_include + 2 * required_ghost_layers + return ["cell_idx_c(%s->%s) + %s" % (field_object.name, e, offset) for e in shape_names] + else: + assert ghost_layers_to_include == 0 + return ["cell_idx_c({ci}.{coord}Size()) + {gl}".format(coord=coord_name, ci=cell_interval, + gl=2 * required_ghost_layers) + for coord_name in ('x', 'y', 'z')] + + for param in ast_params: + if param.is_field_parameter and FieldType.is_indexed(param.fields[0]): + continue + + if param.is_field_pointer: + field = param.fields[0] + if field.field_type == FieldType.BUFFER: + kernel_call_lines.append("%s %s = %s;" % (param.symbol.dtype, param.symbol.name, param.field_name)) + else: + coordinates = get_start_coordinates(field) + actual_gls = "int_c(%s->nrOfGhostLayers())" % (param.field_name, ) + coord_set = set(coordinates) + coord_set = sorted(coord_set, key=lambda e: str(e)) + for c in coord_set: + kernel_call_lines.append("WALBERLA_ASSERT_GREATER_EQUAL(%s, -%s);" % + (c, actual_gls)) + while len(coordinates) < 4: + coordinates.append(0) + coordinates = tuple(coordinates) + kernel_call_lines.append("%s %s = %s->dataAt(%s, %s, %s, %s);" % + ((param.symbol.dtype, param.symbol.name, param.field_name) + coordinates)) + elif param.is_field_stride: + casted_stride = get_field_stride(param) + type_str = param.symbol.dtype.base_name + kernel_call_lines.append("const %s %s = %s;" % (type_str, param.symbol.name, casted_stride)) + elif param.is_field_shape: + coord = param.symbol.coordinate + field = param.fields[0] + type_str = param.symbol.dtype.base_name + shape = "%s(%s)" % (type_str, get_end_coordinates(field)[coord]) + assert coord < 3 + max_value = "%s->%sSizeWithGhostLayer()" % (field.name, ('x', 'y', 'z')[coord]) + kernel_call_lines.append("WALBERLA_ASSERT_GREATER_EQUAL(%s, %s);" % (max_value, shape)) + kernel_call_lines.append("const %s %s = %s;" % (type_str, param.symbol.name, shape)) + + call_parameters = ", ".join([p.symbol.name for p in ast_params]) + if not is_cpu: + if not spatial_shape_symbols: + spatial_shape_symbols = [p.symbol for p in ast_params if p.is_field_shape] + spatial_shape_symbols.sort(key=lambda e: e.coordinate) + else: + spatial_shape_symbols = [TypedSymbol(s, SHAPE_DTYPE) for s in spatial_shape_symbols] + + indexing_dict = ast.indexing.call_parameters(spatial_shape_symbols) + sp_printer_c = CudaSympyPrinter() + kernel_call_lines += [ + "dim3 _block(int(%s), int(%s), int(%s));" % tuple(sp_printer_c.doprint(e) for e in indexing_dict['block']), + "dim3 _grid(int(%s), int(%s), int(%s));" % tuple(sp_printer_c.doprint(e) for e in indexing_dict['grid']), + "internal_%s::%s<<<_grid, _block, 0, %s>>>(%s);" % (ast.function_name, ast.function_name, + stream, call_parameters), + ] + else: + kernel_call_lines.append("internal_%s::%s(%s);" % (ast.function_name, ast.function_name, call_parameters)) + return "\n".join(kernel_call_lines) + + +def generate_swaps(kernel_info): + """Generates code to swap main fields with temporary fields""" + swaps = "" + for src, dst in kernel_info.field_swaps: + swaps += "%s->swapDataPointers(%s);\n" % (src, dst) + return swaps + + +def generate_constructor_initializer_list(kernel_info, parameters_to_ignore=None): + if parameters_to_ignore is None: + parameters_to_ignore = [] + + parameters_to_ignore += kernel_info.temporary_fields + + parameter_initializer_list = [] + for param in kernel_info.parameters: + if param.is_field_pointer and param.field_name not in parameters_to_ignore: + parameter_initializer_list.append("%sID(%sID_)" % (param.field_name, param.field_name)) + elif not param.is_field_parameter and param.symbol.name not in parameters_to_ignore: + parameter_initializer_list.append("%s(%s_)" % (param.symbol.name, param.symbol.name)) + return ", ".join(parameter_initializer_list) + + +def generate_constructor_parameters(kernel_info, parameters_to_ignore=None): + if parameters_to_ignore is None: + parameters_to_ignore = [] + + varying_parameters = [] + if hasattr(kernel_info, 'varying_parameters'): + varying_parameters = kernel_info.varying_parameters + varying_parameter_names = tuple(e[1] for e in varying_parameters) + parameters_to_ignore += kernel_info.temporary_fields + varying_parameter_names + + parameter_list = [] + for param in kernel_info.parameters: + if param.is_field_pointer and param.field_name not in parameters_to_ignore: + parameter_list.append("BlockDataID %sID_" % (param.field_name, )) + elif not param.is_field_parameter and param.symbol.name not in parameters_to_ignore: + parameter_list.append("%s %s_" % (param.symbol.dtype, param.symbol.name,)) + varying_parameters = ["%s %s_" % e for e in varying_parameters] + return ", ".join(parameter_list + varying_parameters) + + +@jinja2.contextfilter +def generate_members(ctx, kernel_info, parameters_to_ignore=(), only_fields=False): + ast = kernel_info.ast + fields = {f.name: f for f in ast.fields_accessed} + + params_to_skip = tuple(parameters_to_ignore) + tuple(kernel_info.temporary_fields) + params_to_skip += tuple(e[1] for e in kernel_info.varying_parameters) + is_gpu = ctx['target'] == 'gpu' + + result = [] + for param in kernel_info.parameters: + if only_fields and not param.is_field_parameter: + continue + if param.is_field_pointer and param.field_name not in params_to_skip: + result.append("BlockDataID %sID;" % (param.field_name, )) + elif not param.is_field_parameter and param.symbol.name not in params_to_skip: + result.append("%s %s;" % (param.symbol.dtype, param.symbol.name,)) + + for field_name in kernel_info.temporary_fields: + f = fields[field_name] + if field_name in parameters_to_ignore: + continue + assert field_name.endswith('_tmp') + original_field_name = field_name[:-len('_tmp')] + f_size = get_field_fsize(f) + field_type = make_field_type(get_base_type(f.dtype), f_size, is_gpu) + result.append(temporary_fieldMemberTemplate.format(type=field_type, original_field_name=original_field_name)) + + if hasattr(kernel_info, 'varying_parameters'): + result.extend(["%s %s;" % e for e in kernel_info.varying_parameters]) + + return "\n".join(result) + + +def generate_destructor(kernel_info, class_name): + if not kernel_info.temporary_fields: + return "" + else: + contents = "" + for field_name in kernel_info.temporary_fields: + contents += delete_loop.format(original_field_name=field_name[:-len('_tmp')]) + return temporary_constructor.format(contents=contents, class_name=class_name) + + +def add_pystencils_filters_to_jinja_env(jinja_env): + jinja_env.filters['generate_definition'] = generate_definition + jinja_env.filters['generate_declaration'] = generate_declaration + jinja_env.filters['generate_members'] = generate_members + jinja_env.filters['generate_constructor_parameters'] = generate_constructor_parameters + jinja_env.filters['generate_constructor_initializer_list'] = generate_constructor_initializer_list + jinja_env.filters['generate_call'] = generate_call + jinja_env.filters['generate_block_data_to_field_extraction'] = generate_block_data_to_field_extraction + jinja_env.filters['generate_swaps'] = generate_swaps + jinja_env.filters['generate_refs_for_kernel_parameters'] = generate_refs_for_kernel_parameters + jinja_env.filters['generate_destructor'] = generate_destructor diff --git a/python/pystencils_walberla/templates/CpuPackInfo.tmpl.cpp b/python/pystencils_walberla/templates/CpuPackInfo.tmpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..24fd293700989b45229d48108aea6036e2dc913a --- /dev/null +++ b/python/pystencils_walberla/templates/CpuPackInfo.tmpl.cpp @@ -0,0 +1,105 @@ +#include "stencil/Directions.h" +#include "core/cell/CellInterval.h" +#include "core/DataTypes.h" +#include "{{class_name}}.h" + +{% for header in headers %} +#include {{header}} +{% endfor %} + +namespace walberla { +namespace {{namespace}} { + +using walberla::cell::CellInterval; +using walberla::stencil::Direction; + + +{% for kernel in pack_kernels.values() %} +{{kernel|generate_definition(target)}} +{% endfor %} + +{% for kernel in unpack_kernels.values() %} +{{kernel|generate_definition(target)}} +{% endfor %} + + +void {{class_name}}::pack(Direction dir, unsigned char * byte_buffer, IBlock * block) const +{ + {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(byte_buffer); + + {{fused_kernel|generate_block_data_to_field_extraction(parameters_to_ignore=['buffer'])|indent(4)}} + CellInterval ci; + {{field_name}}->getSliceBeforeGhostLayer(dir, ci, 1, false); + + switch( dir ) + { + {%- for direction_set, kernel in pack_kernels.items() %} + {%- for dir in direction_set %} + case stencil::{{dir}}: + {%- endfor %} + { + {{kernel|generate_call(cell_interval="ci")|indent(12)}} + break; + } + {% endfor %} + + default: + WALBERLA_ASSERT(false); + } +} + + +void {{class_name}}::unpack(Direction dir, unsigned char * byte_buffer, IBlock * block) const +{ + {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(byte_buffer); + + {{fused_kernel|generate_block_data_to_field_extraction(parameters_to_ignore=['buffer'])|indent(4)}} + CellInterval ci; + {{field_name}}->getGhostRegion(dir, ci, 1, false); + auto communciationDirection = stencil::inverseDir[dir]; + + switch( communciationDirection ) + { + {%- for direction_set, kernel in unpack_kernels.items() %} + {%- for dir in direction_set %} + case stencil::{{dir}}: + {%- endfor %} + { + {{kernel|generate_call(cell_interval="ci")|indent(12)}} + break; + } + {% endfor %} + + default: + WALBERLA_ASSERT(false); + } +} + + +uint_t {{class_name}}::size(stencil::Direction dir, const IBlock * block) const +{ + {{fused_kernel|generate_block_data_to_field_extraction(parameters_to_ignore=['buffer'])|indent(4)}} + CellInterval ci; + {{field_name}}->getGhostRegion(dir, ci, 1, false); + + uint_t elementsPerCell = 0; + + switch( dir ) + { + {%- for direction_set, elements in elements_per_cell.items() %} + {%- for dir in direction_set %} + case stencil::{{dir}}: + {%- endfor %} + elementsPerCell = {{elements}}; + break; + {% endfor %} + default: + elementsPerCell = 0; + } + return ci.numCells() * elementsPerCell * sizeof( {{dtype}} ); +} + + + +} // namespace {{namespace}} +} // namespace walberla \ No newline at end of file diff --git a/python/pystencils_walberla/templates/CpuPackInfo.tmpl.h b/python/pystencils_walberla/templates/CpuPackInfo.tmpl.h new file mode 100644 index 0000000000000000000000000000000000000000..ab7ef56fd2b53e9b177b1222b89a087a37447569 --- /dev/null +++ b/python/pystencils_walberla/templates/CpuPackInfo.tmpl.h @@ -0,0 +1,62 @@ +#pragma once +#include "stencil/Directions.h" +#include "core/cell/CellInterval.h" +#include "core/DataTypes.h" +#include "field/GhostLayerField.h" +#include "domain_decomposition/IBlock.h" +#include "communication/UniformPackInfo.h" + +#define FUNC_PREFIX + +#ifdef __GNUC__ +#define RESTRICT __restrict__ +#elif _MSC_VER +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +namespace walberla { +namespace {{namespace}} { + + +class {{class_name}} : public ::walberla::communication::UniformPackInfo +{ +public: + {{class_name}}( {{fused_kernel|generate_constructor_parameters(parameters_to_ignore=['buffer'])}} ) + : {{ fused_kernel|generate_constructor_initializer_list(parameters_to_ignore=['buffer']) }} + {}; + virtual ~{{class_name}}() {} + + bool constantDataExchange() const { return true; } + bool threadsafeReceiving() const { return true; } + + void unpackData(IBlock * receiver, stencil::Direction dir, mpi::RecvBuffer & buffer) { + const auto dataSize = size(dir, receiver); + unpack(dir, buffer.skip(dataSize), receiver); + } + + void communicateLocal(const IBlock * sender, IBlock * receiver, stencil::Direction dir) { + //TODO: optimize by generating kernel for this case + mpi::SendBuffer sBuffer; + packData( sender, dir, sBuffer ); + mpi::RecvBuffer rBuffer( sBuffer ); + unpackData( receiver, stencil::inverseDir[dir], rBuffer ); + } + +private: + void packDataImpl(const IBlock * sender, stencil::Direction dir, mpi::SendBuffer & outBuffer) const { + const auto dataSize = size(dir, sender); + pack(dir, outBuffer.forward(dataSize), const_cast<IBlock*>(sender)); + } + + void pack (stencil::Direction dir, unsigned char * buffer, IBlock * block) const; + void unpack(stencil::Direction dir, unsigned char * buffer, IBlock * block) const; + uint_t size (stencil::Direction dir, const IBlock * block) const; + + {{fused_kernel|generate_members(parameters_to_ignore=['buffer'])|indent(4)}} +}; + + +} // namespace {{namespace}} +} // namespace walberla diff --git a/python/pystencils_walberla/templates/GpuPackInfo.tmpl.cpp b/python/pystencils_walberla/templates/GpuPackInfo.tmpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d2624b18800f4ee4f8f6d16833cc0f327224ea02 --- /dev/null +++ b/python/pystencils_walberla/templates/GpuPackInfo.tmpl.cpp @@ -0,0 +1,111 @@ +#include "stencil/Directions.h" +#include "core/cell/CellInterval.h" +#include "cuda/GPUField.h" +#include "core/DataTypes.h" +#include "{{class_name}}.h" + + +{% if target is equalto 'cpu' -%} +#define FUNC_PREFIX +{%- elif target is equalto 'gpu' -%} +#define FUNC_PREFIX __global__ +{%- endif %} + + +namespace walberla { +namespace {{namespace}} { + +using walberla::cell::CellInterval; +using walberla::stencil::Direction; + + +{% for kernel in pack_kernels.values() %} +{{kernel|generate_definition(target)}} +{% endfor %} + +{% for kernel in unpack_kernels.values() %} +{{kernel|generate_definition(target)}} +{% endfor %} + + + +void {{class_name}}::pack(Direction dir, unsigned char * byte_buffer, IBlock * block, cudaStream_t stream) +{ + {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(byte_buffer); + + {{fused_kernel|generate_block_data_to_field_extraction(parameters_to_ignore=['buffer'])|indent(4)}} + CellInterval ci; + {{field_name}}->getSliceBeforeGhostLayer(dir, ci, 1, false); + + switch( dir ) + { + {%- for direction_set, kernel in pack_kernels.items() %} + {%- for dir in direction_set %} + case stencil::{{dir}}: + {%- endfor %} + { + {{kernel|generate_call(cell_interval="ci", stream="stream")|indent(12)}} + break; + } + {% endfor %} + + default: + WALBERLA_ASSERT(false); + } +} + + +void {{class_name}}::unpack(Direction dir, unsigned char * byte_buffer, IBlock * block, cudaStream_t stream) +{ + {{dtype}} * buffer = reinterpret_cast<{{dtype}}*>(byte_buffer); + + {{fused_kernel|generate_block_data_to_field_extraction(parameters_to_ignore=['buffer'])|indent(4)}} + CellInterval ci; + {{field_name}}->getGhostRegion(dir, ci, 1, false); + auto communciationDirection = stencil::inverseDir[dir]; + + switch( communciationDirection ) + { + {%- for direction_set, kernel in unpack_kernels.items() %} + {%- for dir in direction_set %} + case stencil::{{dir}}: + {%- endfor %} + { + {{kernel|generate_call(cell_interval="ci", stream="stream")|indent(12)}} + break; + } + {% endfor %} + + default: + WALBERLA_ASSERT(false); + } +} + + +uint_t {{class_name}}::size(stencil::Direction dir, IBlock * block) +{ + {{fused_kernel|generate_block_data_to_field_extraction(parameters_to_ignore=['buffer'])|indent(4)}} + CellInterval ci; + {{field_name}}->getGhostRegion(dir, ci, 1, false); + + uint_t elementsPerCell = 0; + + switch( dir ) + { + {%- for direction_set, elements in elements_per_cell.items() %} + {%- for dir in direction_set %} + case stencil::{{dir}}: + {%- endfor %} + elementsPerCell = {{elements}}; + break; + {% endfor %} + default: + elementsPerCell = 0; + } + return ci.numCells() * elementsPerCell * sizeof( {{dtype}} ); +} + + + +} // namespace {{namespace}} +} // namespace walberla \ No newline at end of file diff --git a/python/pystencils_walberla/templates/GpuPackInfo.tmpl.h b/python/pystencils_walberla/templates/GpuPackInfo.tmpl.h new file mode 100644 index 0000000000000000000000000000000000000000..8b70e1cb8dce1898f8a5c955c59f810bc3353aa8 --- /dev/null +++ b/python/pystencils_walberla/templates/GpuPackInfo.tmpl.h @@ -0,0 +1,46 @@ +#pragma once +#include "stencil/Directions.h" +#include "core/cell/CellInterval.h" +#include "cuda/GPUField.h" +#include "core/DataTypes.h" +#include "domain_decomposition/IBlock.h" +#include "cuda/communication/GeneratedGPUPackInfo.h" + + +{% if target is equalto 'cpu' -%} +#define FUNC_PREFIX +{%- elif target is equalto 'gpu' -%} +#define FUNC_PREFIX __global__ +{%- endif %} + +#ifdef __GNUC__ +#define RESTRICT __restrict__ +#elif _MSC_VER +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +namespace walberla { +namespace {{namespace}} { + + +class {{class_name}} : public ::walberla::cuda::GeneratedGPUPackInfo +{ +public: + {{class_name}}( {{fused_kernel|generate_constructor_parameters(parameters_to_ignore=['buffer'])}} ) + : {{ fused_kernel|generate_constructor_initializer_list(parameters_to_ignore=['buffer']) }} + {}; + virtual ~{{class_name}}() {} + + virtual void pack (stencil::Direction dir, unsigned char * buffer, IBlock * block, cudaStream_t stream); + virtual void unpack(stencil::Direction dir, unsigned char * buffer, IBlock * block, cudaStream_t stream); + virtual uint_t size (stencil::Direction dir, IBlock * block); + +private: + {{fused_kernel|generate_members(parameters_to_ignore=['buffer'])|indent(4)}} +}; + + +} // namespace {{namespace}} +} // namespace walberla diff --git a/python/pystencils_walberla/templates/MpiDtypeInfo.tmpl.h b/python/pystencils_walberla/templates/MpiDtypeInfo.tmpl.h new file mode 100644 index 0000000000000000000000000000000000000000..3f9cbb2e659f58eb0b6ae1ff7dcb0e5b1cf0a8e5 --- /dev/null +++ b/python/pystencils_walberla/templates/MpiDtypeInfo.tmpl.h @@ -0,0 +1,84 @@ +#pragma once + +#include "core/debug/Debug.h" +#include "communication/UniformMPIDatatypeInfo.h" +#include "field/communication/MPIDatatypes.h" + +#include <set> + +namespace walberla { +namespace {{namespace}} { + +class {{class_name}} : public ::walberla::communication::UniformMPIDatatypeInfo +{ +public: + using GhostLayerField_T = GhostLayerField<real_t, {{f_size}}>; + + {{class_name}}( BlockDataID {{field_name}} ) + :{{field_name}}_({{field_name}}) + {} + virtual ~{{class_name}}() {} + + virtual shared_ptr<mpi::Datatype> getSendDatatype ( IBlock * block, const stencil::Direction dir ) + { + {% if kind == 'pull' %} + return make_shared<mpi::Datatype>( field::communication::mpiDatatypeSliceBeforeGhostlayerXYZ( + *getField( block ), dir, uint_t( 1 ), getOptimizedCommunicationIndices( dir ), false ) ); + {% else %} + return make_shared<mpi::Datatype>( field::communication::mpiDatatypeGhostLayerOnlyXYZ( + *getField( block ), dir, false, getOptimizedCommunicationIndices( dir ) ) ); + {% endif %} + } + + virtual shared_ptr<mpi::Datatype> getRecvDatatype ( IBlock * block, const stencil::Direction dir ) + { + {% if kind == 'pull' %} + return make_shared<mpi::Datatype>( field::communication::mpiDatatypeGhostLayerOnlyXYZ( + *getField( block ), dir, false, getOptimizedCommunicationIndices( stencil::inverseDir[dir] ) ) ); + {% else %} + return make_shared<mpi::Datatype>( field::communication::mpiDatatypeSliceBeforeGhostlayerXYZ( + *getField( block ), dir, uint_t( 1 ), getOptimizedCommunicationIndices( stencil::inverseDir[dir] ), false ) ); + {% endif %} + } + + virtual void * getSendPointer( IBlock * block, const stencil::Direction ) { + return getField(block)->data(); + } + + virtual void * getRecvPointer( IBlock * block, const stencil::Direction ) { + return getField(block)->data(); + } + +private: + + inline static std::set< cell_idx_t > getOptimizedCommunicationIndices( const stencil::Direction dir ) + { + switch(dir) + { + {%- for direction_set, index_set in spec.items() %} + {%- for dir in direction_set %} + case stencil::{{dir}}: + {%- endfor %} + return {{index_set}}; + {% endfor %} + default: + WALBERLA_ASSERT(false); + return {}; + } + } + + GhostLayerField_T * getField( IBlock * block ) + { + GhostLayerField_T * const f = block->getData<GhostLayerField_T>( {{field_name}}_ ); + WALBERLA_ASSERT_NOT_NULLPTR( f ); + return f; + } + + BlockDataID {{field_name}}_; +}; + + +} // namespace {{namespace}} +} // namespace walberla + + diff --git a/python/pystencils_walberla/templates/Sweep.tmpl.cpp b/python/pystencils_walberla/templates/Sweep.tmpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8628e40a99beec142dc087831630c6cf189a78ff --- /dev/null +++ b/python/pystencils_walberla/templates/Sweep.tmpl.cpp @@ -0,0 +1,95 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \\file {{class_name}}.cpp +//! \\ingroup lbm +//! \\author lbmpy +//====================================================================================================================== + +#include <cmath> + +#include "core/DataTypes.h" +#include "core/Macros.h" +#include "{{class_name}}.h" +{% for header in headers %} +#include {{header}} +{% endfor %} + + +{% if target is equalto 'cpu' -%} +#define FUNC_PREFIX +{%- elif target is equalto 'gpu' -%} +#define FUNC_PREFIX __global__ +{%- endif %} + +#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wfloat-equal" +# pragma GCC diagnostic ignored "-Wshadow" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wunused-variable" +#endif + +#if ( defined WALBERLA_CXX_COMPILER_IS_INTEL ) +#pragma warning push +#pragma warning( disable : 1599 ) +#endif + +using namespace std; + +namespace walberla { +namespace {{namespace}} { + + +{{kernel|generate_definition(target)}} + +void {{class_name}}::operator()( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream{% endif %} ) +{ + {{kernel|generate_block_data_to_field_extraction|indent(4)}} + {{kernel|generate_call(stream='stream')|indent(4)}} + {{kernel|generate_swaps|indent(4)}} +} + + +void {{class_name}}::runOnCellInterval( const shared_ptr<StructuredBlockStorage> & blocks, + const CellInterval & globalCellInterval, + cell_idx_t ghostLayers, + IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream{% endif %} ) +{ + CellInterval ci = globalCellInterval; + CellInterval blockBB = blocks->getBlockCellBB( *block); + blockBB.expand( ghostLayers ); + ci.intersect( blockBB ); + blocks->transformGlobalToBlockLocalCellInterval( ci, *block ); + if( ci.empty() ) + return; + + {{kernel|generate_block_data_to_field_extraction|indent(4)}} + {{kernel|generate_call(stream='stream', cell_interval='ci')|indent(4)}} + {{kernel|generate_swaps|indent(4)}} +} + + +} // namespace {{namespace}} +} // namespace walberla + + +#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) +# pragma GCC diagnostic pop +#endif + +#if ( defined WALBERLA_CXX_COMPILER_IS_INTEL ) +#pragma warning pop +#endif diff --git a/python/pystencils_walberla/templates/Sweep.tmpl.h b/python/pystencils_walberla/templates/Sweep.tmpl.h new file mode 100644 index 0000000000000000000000000000000000000000..9de4b96c2284560b920a6c592d8b2e5749a2bbf7 --- /dev/null +++ b/python/pystencils_walberla/templates/Sweep.tmpl.h @@ -0,0 +1,93 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \\file {{class_name}}.h +//! \\author pystencils +//====================================================================================================================== + +#pragma once +#include "core/DataTypes.h" + +{% if target is equalto 'cpu' -%} +#include "field/GhostLayerField.h" +{%- elif target is equalto 'gpu' -%} +#include "cuda/GPUField.h" +{%- endif %} +#include "field/SwapableCompare.h" +#include "domain_decomposition/BlockDataID.h" +#include "domain_decomposition/IBlock.h" +#include "domain_decomposition/StructuredBlockStorage.h" +#include <set> + +#ifdef __GNUC__ +#define RESTRICT __restrict__ +#elif _MSC_VER +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +namespace walberla { +namespace {{namespace}} { + + +class {{class_name}} +{ +public: + {{class_name}}( {{kernel|generate_constructor_parameters}}) + : {{ kernel|generate_constructor_initializer_list }} + {}; + + {{ kernel| generate_destructor(class_name) |indent(4) }} + + void operator() ( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream = 0{% endif %} ); + void runOnCellInterval(const shared_ptr<StructuredBlockStorage> & blocks, + const CellInterval & globalCellInterval, cell_idx_t ghostLayers, IBlock * block + {%if target is equalto 'gpu'%} , cudaStream_t stream = 0{% endif %}); + + + + static std::function<void (IBlock*)> getSweep(const shared_ptr<{{class_name}}> & kernel) { + return [kernel](IBlock * b) { (*kernel)(b); }; + } + + static std::function<void (IBlock*{%if target is equalto 'gpu'%} , cudaStream_t {% endif %})> + getSweepOnCellInterval(const shared_ptr<{{class_name}}> & kernel, + const shared_ptr<StructuredBlockStorage> & blocks, + const CellInterval & globalCellInterval, + cell_idx_t ghostLayers=1 ) + { + return [kernel, blocks, globalCellInterval, ghostLayers] (IBlock * b{%if target is equalto 'gpu'%} , cudaStream_t stream = 0{% endif %}) { + kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b{%if target is equalto 'gpu'%},stream {% endif %}); + }; + } + + {{ kernel|generate_members|indent(4) }} + +}; + + +} // namespace {{namespace}} +} // namespace walberla + + +#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) +# pragma GCC diagnostic pop +#endif diff --git a/python/pystencils_walberla/templates/SweepInnerOuter.tmpl.cpp b/python/pystencils_walberla/templates/SweepInnerOuter.tmpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bdff84684a765761eb59d1fa77e1f884de6ae261 --- /dev/null +++ b/python/pystencils_walberla/templates/SweepInnerOuter.tmpl.cpp @@ -0,0 +1,141 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \\file {{class_name}}.cpp +//! \\ingroup lbm +//! \\author lbmpy +//====================================================================================================================== + +#include <cmath> + +#include "core/DataTypes.h" +#include "core/Macros.h" +#include "{{class_name}}.h" + + +{% if target is equalto 'cpu' -%} +#define FUNC_PREFIX +{%- elif target is equalto 'gpu' -%} +#define FUNC_PREFIX __global__ +{%- endif %} + +#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wfloat-equal" +# pragma GCC diagnostic ignored "-Wshadow" +# pragma GCC diagnostic ignored "-Wconversion" +#endif + +using namespace std; + +namespace walberla { +namespace {{namespace}} { + +{{kernel|generate_definition(target)}} + +void {{class_name}}::operator() ( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream{% endif %} ) +{ + {{kernel|generate_block_data_to_field_extraction|indent(4)}} + {{kernel|generate_call(stream='stream')|indent(4)}} + {{kernel|generate_swaps|indent(4)}} +} + + +void {{class_name}}::runOnCellInterval( const shared_ptr<StructuredBlockStorage> & blocks, + const CellInterval & globalCellInterval, + cell_idx_t ghostLayers, + IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream{% endif %} ) +{ + CellInterval ci = globalCellInterval; + CellInterval blockBB = blocks->getBlockCellBB( *block); + blockBB.expand( ghostLayers ); + ci.intersect( blockBB ); + blocks->transformGlobalToBlockLocalCellInterval( ci, *block ); + if( ci.empty() ) + return; + + {{kernel|generate_block_data_to_field_extraction|indent(4)}} + {{kernel|generate_call(stream='stream', cell_interval='ci')|indent(4)}} + {{kernel|generate_swaps|indent(4)}} +} + + +void {{class_name}}::inner( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream{% endif %} ) +{ + {{kernel|generate_block_data_to_field_extraction|indent(4)}} + + CellInterval inner = {{field}}->xyzSize(); + inner.expand(Cell(-outerWidth_[0], -outerWidth_[1], -outerWidth_[2])); + + {{kernel|generate_call(stream='stream', cell_interval='inner')|indent(4)}} +} + + +void {{class_name}}::outer( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream {% endif %} ) +{ + {{kernel|generate_block_data_to_field_extraction|indent(4)}} + + if( layers_.size() == 0 ) + { + CellInterval ci; + + {{field}}->getSliceBeforeGhostLayer(stencil::T, ci, outerWidth_[2], false); + layers_.push_back(ci); + {{field}}->getSliceBeforeGhostLayer(stencil::B, ci, outerWidth_[2], false); + layers_.push_back(ci); + + {{field}}->getSliceBeforeGhostLayer(stencil::N, ci, outerWidth_[1], false); + ci.expand(Cell(0, 0, -outerWidth_[2])); + layers_.push_back(ci); + {{field}}->getSliceBeforeGhostLayer(stencil::S, ci, outerWidth_[1], false); + ci.expand(Cell(0, 0, -outerWidth_[2])); + layers_.push_back(ci); + + {{field}}->getSliceBeforeGhostLayer(stencil::E, ci, outerWidth_[0], false); + ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2])); + layers_.push_back(ci); + {{field}}->getSliceBeforeGhostLayer(stencil::W, ci, outerWidth_[0], false); + ci.expand(Cell(0, -outerWidth_[1], -outerWidth_[2])); + layers_.push_back(ci); + } + + {%if target is equalto 'gpu'%} + { + auto parallelSection_ = parallelStreams_.parallelSection( stream ); + for( auto & ci: layers_ ) + { + parallelSection_.run([&]( auto s ) { + {{kernel|generate_call(stream='s', cell_interval='ci')|indent(16)}} + }); + } + } + {% else %} + for( auto & ci: layers_ ) + { + {{kernel|generate_call(cell_interval='ci')|indent(8)}} + } + {% endif %} + + {{kernel|generate_swaps|indent(4)}} +} + + +} // namespace {{namespace}} +} // namespace walberla + + +#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) +# pragma GCC diagnostic pop +#endif diff --git a/python/pystencils_walberla/templates/SweepInnerOuter.tmpl.h b/python/pystencils_walberla/templates/SweepInnerOuter.tmpl.h new file mode 100644 index 0000000000000000000000000000000000000000..466f51071dce4066365d19643ef9692f04dba78c --- /dev/null +++ b/python/pystencils_walberla/templates/SweepInnerOuter.tmpl.h @@ -0,0 +1,113 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \\file {{class_name}}.h +//! \\author pystencils +//====================================================================================================================== + +#pragma once +#include "core/DataTypes.h" + +{% if target is equalto 'cpu' -%} +#include "field/GhostLayerField.h" +{%- elif target is equalto 'gpu' -%} +#include "cuda/GPUField.h" +#include "cuda/ParallelStreams.h" +{%- endif %} +#include "field/SwapableCompare.h" +#include "domain_decomposition/BlockDataID.h" +#include "domain_decomposition/IBlock.h" +#include "domain_decomposition/StructuredBlockStorage.h" +#include <set> + +#ifdef __GNUC__ +#define RESTRICT __restrict__ +#elif _MSC_VER +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wunused-parameter" +# pragma GCC diagnostic ignored "-Wreorder" +#endif + +namespace walberla { +namespace {{namespace}} { + + +class {{class_name}} +{ +public: + {{class_name}}( {{kernel|generate_constructor_parameters}}, const Cell & outerWidth=Cell(1, 1, 1)) + : {{ kernel|generate_constructor_initializer_list }}, outerWidth_(outerWidth) + {}; + + {{ kernel| generate_destructor(class_name) |indent(4) }} + + + void operator() ( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream = 0{% endif %} ); + + void runOnCellInterval(const shared_ptr<StructuredBlockStorage> & blocks, + const CellInterval & globalCellInterval, cell_idx_t ghostLayers, IBlock * block + {%if target is equalto 'gpu'%} , cudaStream_t stream = 0{% endif %}); + + + + static std::function<void (IBlock*)> getSweep(const shared_ptr<{{class_name}}> & kernel) { + return [kernel](IBlock * b) { (*kernel)(b); }; + } + + static std::function<void (IBlock*{%if target is equalto 'gpu'%} , cudaStream_t {% endif %})> + getSweepOnCellInterval(const shared_ptr<{{class_name}}> & kernel, + const shared_ptr<StructuredBlockStorage> & blocks, + const CellInterval & globalCellInterval, + cell_idx_t ghostLayers=1 ) + { + return [kernel, blocks, globalCellInterval, ghostLayers] (IBlock * b{%if target is equalto 'gpu'%} , cudaStream_t stream = 0{% endif %}) { + kernel->runOnCellInterval(blocks, globalCellInterval, ghostLayers, b{%if target is equalto 'gpu'%},stream {% endif %}); + }; + } + + + void inner( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream = 0{% endif %} ); + void outer( IBlock * block{%if target is equalto 'gpu'%} , cudaStream_t stream = 0{% endif %} ); + + void setOuterPriority(int priority ) { + {%if target is equalto 'gpu'%} + parallelStreams_.setStreamPriority(priority); + {%endif%} + } + {{kernel|generate_members|indent(4)}} + +private: + {%if target is equalto 'gpu'%} + cuda::ParallelStreams parallelStreams_; + {% endif %} + + Cell outerWidth_; + std::vector<CellInterval> layers_; +}; + + +} // namespace {{namespace}} +} // namespace walberla + + +#if ( defined WALBERLA_CXX_COMPILER_IS_GNU ) || ( defined WALBERLA_CXX_COMPILER_IS_CLANG ) +# pragma GCC diagnostic pop +#endif diff --git a/python/pystencils_walberla/tests/__init__.py b/python/pystencils_walberla/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/python/pystencils_walberla/tests/test_packinfo_generation.py b/python/pystencils_walberla/tests/test_packinfo_generation.py new file mode 100644 index 0000000000000000000000000000000000000000..78ebdd92a9ed6a6569b5e18db53e55d97f54abf4 --- /dev/null +++ b/python/pystencils_walberla/tests/test_packinfo_generation.py @@ -0,0 +1,41 @@ +import unittest + +import pystencils as ps +from pystencils_walberla import generate_pack_info_for_field, generate_pack_info_from_kernel +from pystencils_walberla.cmake_integration import ManualCodeGenerationContext + + +class PackinfoGenTest(unittest.TestCase): + + @staticmethod + def test_packinfo_walberla_gen(): + for openmp in (False, True): + for da in (False, True): + with ManualCodeGenerationContext(openmp=openmp, double_accuracy=da) as ctx: + dtype = "float64" if ctx.double_accuracy else "float32" + f, g = ps.fields("f, g(4): {}[3D]".format(dtype)) + generate_pack_info_for_field(ctx, 'PI1', f) + + src, dst = ps.fields("src, src_tmp: {}[2D]".format(dtype)) + stencil = [[0, -1, 0], + [-1, 4, -1], + [0, -1, 0]] + assignments = [ps.assignment_from_stencil(stencil, src, dst, normalization_factor=4)] + generate_pack_info_from_kernel(ctx, 'PI2', assignments) + + expected_files = ('PI1.cpp', 'PI1.h', 'PI2.cpp', 'PI2.h') + assert all(e in ctx.files for e in expected_files) + + for file_name_to_test in ('PI1.cpp', 'PI2.cpp'): + file_to_test = ctx.files[file_name_to_test] + if openmp: + assert '#pragma omp parallel' in file_to_test + + if da: + assert 'float ' not in file_to_test + else: + assert 'double ' not in file_to_test + + +if __name__ == '__main__': + unittest.main() diff --git a/python/pystencils_walberla/tests/test_walberla_gen.py b/python/pystencils_walberla/tests/test_walberla_gen.py new file mode 100644 index 0000000000000000000000000000000000000000..3ffe156e9c66f8c6f83aeec0382e490861855569 --- /dev/null +++ b/python/pystencils_walberla/tests/test_walberla_gen.py @@ -0,0 +1,56 @@ +import unittest + +import sympy as sp + +import pystencils as ps +from pystencils_walberla import generate_sweep +from pystencils_walberla.cmake_integration import ManualCodeGenerationContext + + +class CodegenTest(unittest.TestCase): + + @staticmethod + def test_codegen(): + for openmp in (False, True): + for da in (False, True): + with ManualCodeGenerationContext(openmp=openmp, double_accuracy=da) as ctx: + h = sp.symbols("h") + + dtype = "float64" if ctx.double_accuracy else "float32" + + # ----- Jacobi 2D - created by specifying weights in nested list -------------------------- + src, dst = ps.fields("src, src_tmp: {}[2D]".format(dtype)) + stencil = [[0, -1, 0], + [-1, 4, -1], + [0, -1, 0]] + assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=4 * h ** 2) + generate_sweep(ctx, 'JacobiKernel2D', assignments, field_swaps=[(src, dst)]) + + # ----- Jacobi 3D - created by using kernel_decorator with assignments in '@=' format ----- + src, dst = ps.fields("src, src_tmp: {}[3D]".format(dtype)) + + @ps.kernel + def kernel_func(): + dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0] + + src[0, 1, 0] + src[0, -1, 0] + + src[0, 0, 1] + src[0, 0, -1]) / (6 * h ** 2) + + generate_sweep(ctx, 'JacobiKernel3D', kernel_func, field_swaps=[(src, dst)]) + + expected_files = ('JacobiKernel3D.cpp', 'JacobiKernel3D.h', + 'JacobiKernel2D.cpp', 'JacobiKernel2D.h') + assert all(e in ctx.files for e in expected_files) + + for file_name_to_test in ('JacobiKernel3D.cpp', 'JacobiKernel2D.cpp'): + file_to_test = ctx.files[file_name_to_test] + if openmp: + assert '#pragma omp parallel' in file_to_test + + if da: + assert 'float ' not in file_to_test + else: + assert 'double ' not in file_to_test + + +if __name__ == '__main__': + unittest.main()