583dcc95 · 4c2cca10 · 17c9570b · a5fb3e75 · 0b4b9a87 · 46b23235
--- a/.clang-format
+++ b/.clang-format
@@ -61,21 +61,21 @@ IncludeCategories:
    Priority:        3
  - Regex:           '^"core/'
    Priority:        4
-  - Regex:           '^"cuda/'
-    Priority:        5
  - Regex:           '^"domain_decomposition/'
-    Priority:        6
+    Priority:        5
  - Regex:           '^"executiontree/'
-    Priority:        7
+    Priority:        6
  - Regex:           '^"fft/'
-    Priority:        8
+    Priority:        7
  - Regex:           '^"field/'
-    Priority:        9
+    Priority:        8
  - Regex:           '^"gather/'
-    Priority:        10
+    Priority:        9
  - Regex:           '^"geometry/'
+    Priority:        10
+  - Regex:           '^"gpu/'
    Priority:        11
-  - Regex:           '^"gui/'
+  - Regex:           '^"gpu/'
    Priority:        12
  - Regex:           '^"lbm/'
    Priority:        13
@@ -97,16 +97,18 @@ IncludeCategories:
    Priority:        21
  - Regex:           '^"simd/'
    Priority:        22
-  - Regex:           '^"stencil/'
+  - Regex:           '^"sqlite/'
    Priority:        23
-  - Regex:           '^"timeloop/'
+  - Regex:           '^"stencil/'
    Priority:        24
-  - Regex:           '^"vtk/'
+  - Regex:           '^"timeloop/'
    Priority:        25
-  - Regex:           '^<boost/'
+  - Regex:           '^"vtk/'
    Priority:        26
-  - Regex:           '^<'
+  - Regex:           '^<boost/'
    Priority:        27
+  - Regex:           '^<'
+    Priority:        28
 IndentCaseLabels: false
 IndentPPDirectives: AfterHash
 IndentWidth: 3

--- a/.clang-tidy
+++ b/.clang-tidy
@@ -4,15 +4,30 @@ Checks:          '
 -*,

 boost-*,
+-boost-use-ranges,

 bugprone-*,
 -bugprone-branch-clone,
 -bugprone-exception-escape,
+-bugprone-easily-swappable-parameters,
+-bugprone-crtp-constructor-accessibility,
+-bugprone-implicit-widening-of-multiplication-result,
+-bugprone-macro-parentheses,
+-bugprone-narrowing-conversions,
+-bugprone-switch-missing-default-case,
+-bugprone-assignment-in-if-condition,
+-bugprone-reserved-identifier,

 misc-*,
 -misc-misplaced-const,
+-misc-const-correctness,
+-misc-unused-parameters,
 -misc-no-recursion,
 -misc-non-private-member-variables-in-classes,
+-misc-include-cleaner,
+-misc-header-include-cycle,
+-misc-use-internal-linkage,
+-misc-use-anonymous-namespace,

 modernize-*,
 -modernize-use-auto,
@@ -22,12 +37,18 @@ modernize-*,
 -modernize-use-using,
 -modernize-avoid-bind,
 -modernize-return-braced-init-list,
+-modernize-min-max-use-initializer-list,
 -modernize-use-transparent-functors,
 -modernize-redundant-void-arg,
 -modernize-use-trailing-return-type,
+-modernize-use-default-member-init,
+-modernize-use-equals-delete,
+-modernize-macro-to-enum,
 -modernize-avoid-c-arrays,
 -modernize-concat-nested-namespaces,
 -modernize-use-nodiscard,
+-modernize-type-traits,
+-modernize-make-shared,

 mpi-*,
 -mpi-type-mismatch,
@@ -37,25 +58,26 @@ openmp-*,
 -openmp-use-default-none,

 performance-*,
+-performance-enum-size,
+-performance-noexcept-swap,
+-performance-move-const-arg,
+-performance-unnecessary-value-param,
+-performance-avoid-endl,
+-performance-no-int-to-ptr,

 portability-*,

-readability-const-return-type,
 readability-container-size-empty,
 readability-delete-null-pointer,
 readability-deleted-default,
-readability-isolate-declaration,
-readability-misleading-indentation,
 readability-misplaced-array-index,
 readability-non-const-parameter,
-readability-redundant-access-specifiers,
 readability-redundant-control-flow,
 readability-redundant-declaration,
 readability-redundant-function-ptr-dereference,
 readability-redundant-preprocessor,
 readability-redundant-smartptr-get,
 readability-redundant-string-cstr,
-readability-simplify-boolean-expr,
 readability-simplify-subscript-expr,
 readability-static-accessed-through-instance,
 readability-static-definition-in-anonymous-namespace,

--- a/.editorconfig
+++ b/.editorconfig
 # See https://editorconfig.org/
-root = true # top-most .editorconfig-file
+# top-most .editorconfig-file
+root = true

 [*]
 tab_width = 3
@@ -12,4 +13,4 @@ insert_final_newline = false
 [*.py]
 tab_width = 4
 indent_size = 4
-insert_final_newline = true
+insert_final_newline = true
\ No newline at end of file
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,10 @@ qrc_*
 # macOS
 **/.DS_Store

+# CLion indexing
+*.uuid
+.fleet
+

 # Generated files
 *.out
@@ -29,9 +33,12 @@ qrc_*
 # Visual Studio Code
 /.vscode

+# Zed
+/.cache*
+
 # CLion
 *.idea
-
+*.clion*

 # QtCreator
 CMakeLists.txt.user.*
@@ -73,3 +80,10 @@ CMakeDefs.h
 /moduleStatistics.json
 /walberla-config.cmake
 cmake-build-*
+
+
+# Virtual environments
+.venv/
+env/
+venv/
+ENV/
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
--- a/.gitmodules
+++ b/.gitmodules
-[submodule "extern/pybind11"]
-	path = extern/pybind11
-	url = https://github.com/pybind/pybind11.git
--- a/AUTHORS.txt
+++ b/AUTHORS.txt
@@ -31,6 +31,7 @@ Matthias Markl
 Michael Kuron
 Nils Kohl
 Paulo Carvalho
+Philipp Suffa
 Regina Ammer
 Sagar Dolas
 Sebastian Eibl

--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,11 @@
  - Add support for more shapes, e.g., convex polyhedron
 - MESA_PD:
   - Add extensive application for dense particle packing generation
+- AMD - HIP support
+  - Support of the ROCm Toolchain and thus AMD HIP as second GPU language
+  - All CUDA related files, namespaces, folders etc are renamed to gpu.
+  - Include "GPUWrapper.h" to use general GPU functions cudaMalloc -> gpuMalloc
+  - WALBERLA_BUILD_WITH_HIP and WALBERLA_BUILD_WITH_GPU_SUPPORT as new CMake variables introduced

 ### Changed
 - Update and extend phase-field LBM showcases

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
--- a/apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/CMakeLists.txt
+++ b/apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/CMakeLists.txt
-waLBerla_add_executable( NAME WorkloadEvaluation FILES WorkloadEvaluation.cpp DEPENDS blockforest boundary core field lbm pe pe_coupling postprocessing stencil timeloop vtk )
+waLBerla_add_executable( NAME WorkloadEvaluation FILES WorkloadEvaluation.cpp DEPENDS walberla::blockforest walberla::boundary walberla::core walberla::field walberla::lbm walberla::pe walberla::pe_coupling walberla::postprocessing walberla::stencil walberla::timeloop walberla::vtk )

-waLBerla_add_executable( NAME AMRSedimentSettling FILES AMRSedimentSettling.cpp DEPENDS blockforest boundary core field lbm pe pe_coupling postprocessing stencil timeloop vtk )
+waLBerla_add_executable( NAME AMRSedimentSettling FILES AMRSedimentSettling.cpp DEPENDS walberla::blockforest walberla::boundary walberla::core walberla::field walberla::lbm walberla::pe walberla::pe_coupling walberla::postprocessing walberla::stencil walberla::timeloop walberla::vtk )

-waLBerla_add_executable( NAME AMRSettlingSphere FILES AMRSettlingSphere.cpp DEPENDS blockforest boundary core field lbm pe pe_coupling postprocessing stencil timeloop vtk )
+waLBerla_add_executable( NAME AMRSettlingSphere FILES AMRSettlingSphere.cpp DEPENDS walberla::blockforest walberla::boundary walberla::core walberla::field walberla::lbm walberla::pe walberla::pe_coupling walberla::postprocessing walberla::stencil walberla::timeloop walberla::vtk )
--- a/apps/benchmarks/CMakeLists.txt
+++ b/apps/benchmarks/CMakeLists.txt
@@ -5,9 +5,11 @@ add_subdirectory( DEM )
 add_subdirectory( MeshDistance )
 add_subdirectory( CouetteFlow )
 add_subdirectory( FreeSurfaceAdvection )
+add_subdirectory( FluidizedBed )
 add_subdirectory( FluidParticleCoupling )
 add_subdirectory( FluidParticleCouplingWithLoadBalancing )
 add_subdirectory( ForcesOnSphereNearPlaneInShearFlow )
+add_subdirectory(Percolation)
 add_subdirectory( GranularGas )
 add_subdirectory( IntegratorAccuracy )
 add_subdirectory( LennardJones )
@@ -22,13 +24,15 @@ if ( WALBERLA_BUILD_WITH_PYTHON )
   add_subdirectory( FieldCommunication )

   if ( WALBERLA_BUILD_WITH_CODEGEN )
-      add_subdirectory( FlowAroundSphereCodeGen )
      add_subdirectory( UniformGridCPU )
      add_subdirectory( PhaseFieldAllenCahn )
+      add_subdirectory( NonUniformGridCPU )
+      add_subdirectory( TurbulentChannel )
   endif()

-   if ( WALBERLA_BUILD_WITH_CODEGEN AND WALBERLA_BUILD_WITH_CUDA )
+   if ( WALBERLA_BUILD_WITH_CODEGEN AND WALBERLA_BUILD_WITH_GPU_SUPPORT )
      add_subdirectory( UniformGridGPU )
+      add_subdirectory( NonUniformGridGPU )
   endif()

 endif()

--- a/apps/benchmarks/CNT/CMakeLists.txt
+++ b/apps/benchmarks/CNT/CMakeLists.txt
@@ -7,4 +7,4 @@ waLBerla_add_executable( NAME 01_cnt_film
        InitializeCNTs.cpp
        SQLProperties.cpp
        Statistics.cpp
-   DEPENDS blockforest core mesa_pd sqlite vtk )
+      DEPENDS walberla::blockforest walberla::core walberla::mesa_pd walberla::sqlite walberla::vtk )
--- a/apps/benchmarks/ComplexGeometry/CMakeLists.txt
+++ b/apps/benchmarks/ComplexGeometry/CMakeLists.txt
 if ( WALBERLA_BUILD_WITH_OPENMESH )
+   
+	waLBerla_link_geometry_to_builddir( "*.obj" )             
+	waLBerla_link_files_to_builddir( "*.conf" )  

-	waLBerla_link_files_to_builddir( "*.obj" )                 
-   waLBerla_link_files_to_builddir( "*.conf" )  
-                                  
-	waLBerla_add_executable( NAME ComplexGeometry FILES ComplexGeometry.cpp DEPENDS boundary core lbm mesh vtk )
+   waLBerla_add_executable( NAME ComplexGeometry FILES ComplexGeometry.cpp DEPENDS walberla::boundary walberla::core walberla::lbm walberla::mesh walberla::vtk )

 	##############
 	# Some tests #

--- a/apps/benchmarks/CouetteFlow/CMakeLists.txt
+++ b/apps/benchmarks/CouetteFlow/CMakeLists.txt

 waLBerla_link_files_to_builddir( "*.dat" )                 
-                                  
-waLBerla_add_executable( NAME CouetteFlow DEPENDS blockforest boundary core field lbm postprocessing stencil timeloop vtk sqlite )
+
+waLBerla_add_executable( NAME CouetteFlow DEPENDS walberla::blockforest walberla::boundary walberla::core walberla::field walberla::lbm walberla::postprocessing walberla::stencil walberla::timeloop walberla::vtk walberla::sqlite )

 ##############
 # Some tests #

--- a/apps/benchmarks/CouetteFlow/CouetteFlow.cpp
+++ b/apps/benchmarks/CouetteFlow/CouetteFlow.cpp
@@ -773,7 +773,7 @@ void run( const shared_ptr< Config > & config, const LatticeModel_T & latticeMod

   // remaining time logger

-   const double remainingTimeLoggerFrequency = configBlock.getParameter< double >( "remainingTimeLoggerFrequency", 3.0 );
+   const real_t remainingTimeLoggerFrequency = configBlock.getParameter< real_t >( "remainingTimeLoggerFrequency", real_c(3.0) );
   timeloop.addFuncAfterTimeStep( timing::RemainingTimeLogger( timeloop.getNrOfTimeSteps(), remainingTimeLoggerFrequency ), "Remaining time logger" );

   // logging right before the simulation starts

--- a/apps/benchmarks/DEM/CMakeLists.txt
+++ b/apps/benchmarks/DEM/CMakeLists.txt
-waLBerla_add_executable( NAME DEM FILES DEM.cpp DEPENDS blockforest core pe )
+waLBerla_add_executable( NAME DEM FILES DEM.cpp DEPENDS walberla::blockforest walberla::core walberla::pe )
--- a/apps/benchmarks/FieldCommunication/CMakeLists.txt
+++ b/apps/benchmarks/FieldCommunication/CMakeLists.txt
@@ -4,4 +4,4 @@ waLBerla_link_files_to_builddir( "*.py" )


 waLBerla_add_executable ( NAME FieldCommunication
-                          DEPENDS blockforest core domain_decomposition field postprocessing sqlite python_coupling )
+      DEPENDS walberla::blockforest walberla::core walberla::domain_decomposition walberla::field walberla::postprocessing walberla::sqlite walberla::python_coupling )
--- a/apps/benchmarks/FlowAroundSphereCodeGen/CMakeLists.txt
+++ b/apps/benchmarks/FlowAroundSphereCodeGen/CMakeLists.txt
-waLBerla_link_files_to_builddir( "*.py" )
-
-waLBerla_generate_target_from_python(NAME FlowAroundSphereGenerated
-        FILE FlowAroundSphereCodeGen.py
-        OUT_FILES FlowAroundSphereCodeGen_LbSweep.${CODEGEN_FILE_SUFFIX} FlowAroundSphereCodeGen_LbSweep.h
-        FlowAroundSphereCodeGen_MacroSetter.${CODEGEN_FILE_SUFFIX} FlowAroundSphereCodeGen_MacroSetter.h
-        FlowAroundSphereCodeGen_UBB.${CODEGEN_FILE_SUFFIX} FlowAroundSphereCodeGen_UBB.h
-        FlowAroundSphereCodeGen_NoSlip.${CODEGEN_FILE_SUFFIX} FlowAroundSphereCodeGen_NoSlip.h
-        FlowAroundSphereCodeGen_Outflow.${CODEGEN_FILE_SUFFIX} FlowAroundSphereCodeGen_Outflow.h
-        FlowAroundSphereCodeGen_PackInfoEven.${CODEGEN_FILE_SUFFIX} FlowAroundSphereCodeGen_PackInfoEven.h
-        FlowAroundSphereCodeGen_PackInfoOdd.${CODEGEN_FILE_SUFFIX} FlowAroundSphereCodeGen_PackInfoOdd.h
-        FlowAroundSphereCodeGen_InfoHeader.h)
-
-if (WALBERLA_BUILD_WITH_CUDA)
-    waLBerla_add_executable( NAME FlowAroundSphereCodeGen FILE FlowAroundSphereCodeGen.cpp
-            DEPENDS blockforest boundary core cuda domain_decomposition field geometry python_coupling timeloop vtk FlowAroundSphereGenerated)
-else ()
-    waLBerla_add_executable( NAME FlowAroundSphereCodeGen FILE FlowAroundSphereCodeGen.cpp
-            DEPENDS blockforest boundary core domain_decomposition field geometry python_coupling timeloop vtk FlowAroundSphereGenerated)
-endif (WALBERLA_BUILD_WITH_CUDA)
\ No newline at end of file
--- a/apps/benchmarks/FlowAroundSphereCodeGen/FlowAroundSphereCodeGen.cpp
+++ b/apps/benchmarks/FlowAroundSphereCodeGen/FlowAroundSphereCodeGen.cpp
-//======================================================================================================================
-//
-//  This file is part of waLBerla. waLBerla is free software: you can
-//  redistribute it and/or modify it under the terms of the GNU General Public
-//  License as published by the Free Software Foundation, either version 3 of
-//  the License, or (at your option) any later version.
-//
-//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
-//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-//  for more details.
-//
-//  You should have received a copy of the GNU General Public License along
-//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
-//
-//! \file FlowAroundSphereCodeGen.cpp
-//! \author Frederik Hennig <frederik.hennig@fau.de>
-//! \author Markus Holzer <markus.holzer@fau.de>
-//
-//======================================================================================================================
-#include "blockforest/all.h"
-
-#include "core/all.h"
-
-#include "domain_decomposition/all.h"
-
-#include "field/all.h"
-
-#include "geometry/all.h"
-
-#include "lbm/inplace_streaming/TimestepTracker.h"
-#include "lbm/vtk/QCriterion.h"
-
-#include "python_coupling/CreateConfig.h"
-#include "python_coupling/PythonCallback.h"
-
-#include "timeloop/all.h"
-
-#if defined(WALBERLA_BUILD_WITH_CUDA)
-#   include "cuda/AddGPUFieldToStorage.h"
-#   include "cuda/DeviceSelectMPI.h"
-#   include "cuda/HostFieldAllocator.h"
-#   include "cuda/NVTX.h"
-#   include "cuda/ParallelStreams.h"
-#   include "cuda/communication/GPUPackInfo.h"
-#   include "cuda/communication/UniformGPUScheme.h"
-#endif
-
-// CodeGen includes
-#include "FlowAroundSphereCodeGen_InfoHeader.h"
-
-namespace walberla
-{
-typedef lbm::FlowAroundSphereCodeGen_PackInfoEven PackInfoEven_T;
-typedef lbm::FlowAroundSphereCodeGen_PackInfoOdd PackInfoOdd_T;
-
-typedef walberla::uint8_t flag_t;
-typedef FlagField< flag_t > FlagField_T;
-
-#if defined(WALBERLA_BUILD_WITH_CUDA)
-typedef cuda::GPUField< real_t > GPUField;
-#endif
-
-using namespace std::placeholders;
-
-auto pdfFieldAdder = [](IBlock* const block, StructuredBlockStorage* const storage) {
-   return new PdfField_T(storage->getNumberOfXCells(*block), storage->getNumberOfYCells(*block),
-                         storage->getNumberOfZCells(*block), uint_t(1), field::fzyx,
-                         make_shared< field::AllocateAligned< real_t, 64 > >());
-};
-
-auto VelocityCallback = [](const Cell& pos, const shared_ptr< StructuredBlockForest >& SbF, IBlock& block,
-                           real_t inflow_velocity, const bool constant_inflow = true) {
-   if (constant_inflow)
-   {
-      Vector3< real_t > result(inflow_velocity, real_c(0.0), real_c(0.0));
-      return result;
-   }
-   else
-   {
-      Cell globalCell;
-      CellInterval domain = SbF->getDomainCellBB();
-      auto h_y          = real_c(domain.ySize());
-      auto h_z          = real_c(domain.zSize());
-      SbF->transformBlockLocalToGlobalCell(globalCell, block, pos);
-
-      auto y1 = real_c(globalCell[1] - (h_y / 2.0 - 0.5));
-      auto z1 = real_c(globalCell[2] - (h_z / 2.0 - 0.5));
-
-      real_t u = (inflow_velocity * real_c(16.0)) / (h_y * h_y * h_z * h_z) * (h_y / real_c(2.0) - y1) *
-                 (h_y / real_c(2.0) + y1) * (h_z / real_c(2.0) - z1) * (h_z / real_c(2.0) + z1);
-
-      Vector3< real_t > result(u, 0.0, 0.0);
-      return result;
-   }
-};
-
-class AlternatingBeforeFunction
-{
- public:
-   typedef std::function< void() > BeforeFunction;
-
-   AlternatingBeforeFunction(BeforeFunction evenFunc, BeforeFunction oddFunc,
-                             std::shared_ptr< lbm::TimestepTracker >& tracker)
-      : tracker_(tracker), funcs_{ evenFunc, oddFunc } {};
-
-   void operator()() { funcs_[tracker_->getCounter()](); }
-
- private:
-   std::shared_ptr< lbm::TimestepTracker > tracker_;
-   std::vector< BeforeFunction > funcs_;
-};
-
-class Filter
-{
- public:
-   explicit Filter(Vector3< uint_t > numberOfCells) : numberOfCells_(numberOfCells) {}
-
-   void operator()(const IBlock& /*block*/) {}
-
-   bool operator()(const cell_idx_t x, const cell_idx_t y, const cell_idx_t z) const
-   {
-      return x >= -1 && x <= cell_idx_t(numberOfCells_[0]) && y >= -1 && y <= cell_idx_t(numberOfCells_[1]) &&
-             z >= -1 && z <= cell_idx_t(numberOfCells_[2]);
-   }
-
- private:
-   Vector3< uint_t > numberOfCells_;
-};
-
-using FluidFilter_T = Filter;
-
-int main(int argc, char** argv)
-{
-   walberla::Environment walberlaEnv(argc, argv);
-#if defined(WALBERLA_BUILD_WITH_CUDA)
-   cuda::selectDeviceBasedOnMpiRank();
-#endif
-
-   for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
-   {
-      WALBERLA_MPI_WORLD_BARRIER()
-
-      auto config = *cfg;
-      logging::configureLogging(config);
-      auto blocks = blockforest::createUniformBlockGridFromConfig(config);
-
-      // read parameters
-      Vector3< uint_t > cellsPerBlock =
-         config->getBlock("DomainSetup").getParameter< Vector3< uint_t > >("cellsPerBlock");
-      auto parameters = config->getOneBlock("Parameters");
-
-      const uint_t timesteps       = parameters.getParameter< uint_t >("timesteps", uint_c(10));
-      const real_t omega           = parameters.getParameter< real_t >("omega", real_c(1.9));
-      const real_t u_max           = parameters.getParameter< real_t >("u_max", real_c(0.05));
-      const real_t reynolds_number = parameters.getParameter< real_t >("reynolds_number", real_c(1000.0));
-      const uint_t diameter_sphere = parameters.getParameter< uint_t >("diameter_sphere", uint_t(5));
-      const bool constant_inflow = parameters.getParameter< bool >("constant_inflow", true);
-
-      const double remainingTimeLoggerFrequency =
-         parameters.getParameter< double >("remainingTimeLoggerFrequency", 3.0); // in seconds
-
-      // create fields
-      BlockDataID pdfFieldID     = blocks->addStructuredBlockData< PdfField_T >(pdfFieldAdder, "PDFs");
-      BlockDataID velFieldID     = field::addToStorage< VelocityField_T >(blocks, "velocity", real_c(0.0), field::fzyx);
-      BlockDataID densityFieldID = field::addToStorage< ScalarField_T >(blocks, "density", real_c(0.0), field::fzyx);
-
-#if defined(WALBERLA_BUILD_WITH_CUDA)
-      BlockDataID pdfFieldIDGPU = cuda::addGPUFieldToStorage< PdfField_T >(blocks, pdfFieldID, "PDFs on GPU", true);
-      BlockDataID velFieldIDGPU =
-         cuda::addGPUFieldToStorage< VelocityField_T >(blocks, velFieldID, "velocity on GPU", true);
-      BlockDataID densityFieldIDGPU =
-         cuda::addGPUFieldToStorage< ScalarField_T >(blocks, densityFieldID, "density on GPU", true);
-#endif
-
-      BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field");
-
-      // initialise all PDFs
-#if defined(WALBERLA_BUILD_WITH_CUDA)
-      pystencils::FlowAroundSphereCodeGen_MacroSetter setterSweep(pdfFieldIDGPU, velFieldIDGPU);
-      for (auto& block : *blocks)
-         setterSweep(&block);
-      cuda::fieldCpy< PdfField_T, GPUField >(blocks, pdfFieldID, pdfFieldIDGPU);
-#else
-      pystencils::FlowAroundSphereCodeGen_MacroSetter setterSweep(pdfFieldID, velFieldID);
-      for (auto& block : *blocks)
-         setterSweep(&block);
-#endif
-      // Create communication
-
-#if defined(WALBERLA_BUILD_WITH_CUDA)
-      // This way of using alternating pack infos is temporary and will soon be replaced
-      // by something more straight-forward
-
-      cuda::communication::UniformGPUScheme< Stencil_T > comEven(blocks, false);
-      comEven.addPackInfo(make_shared< PackInfoEven_T >(pdfFieldIDGPU));
-      auto evenComm = std::function< void() >([&]() { comEven.communicate(nullptr); });
-
-      cuda::communication::UniformGPUScheme< Stencil_T > comODD(blocks, false);
-      comODD.addPackInfo(make_shared< PackInfoOdd_T >(pdfFieldIDGPU));
-      auto oddComm = std::function< void() >([&]() { comODD.communicate(nullptr); });
-#else
-      blockforest::communication::UniformBufferedScheme< Stencil_T > evenComm(blocks);
-      evenComm.addPackInfo(make_shared< PackInfoEven_T >(pdfFieldID));
-
-      blockforest::communication::UniformBufferedScheme< Stencil_T > oddComm(blocks);
-      oddComm.addPackInfo(make_shared< PackInfoOdd_T >(pdfFieldID));
-#endif
-
-      // create and initialize boundary handling
-      const FlagUID fluidFlagUID("Fluid");
-
-      auto boundariesConfig = config->getOneBlock("Boundaries");
-
-      std::function< Vector3< real_t >(const Cell&, const shared_ptr< StructuredBlockForest >&, IBlock&) >
-         velocity_initialisation = std::bind(VelocityCallback, _1, _2, _3, u_max, constant_inflow);
-
-#if defined(WALBERLA_BUILD_WITH_CUDA)
-      lbm::FlowAroundSphereCodeGen_UBB ubb(blocks, pdfFieldIDGPU, velocity_initialisation);
-      lbm::FlowAroundSphereCodeGen_NoSlip noSlip(blocks, pdfFieldIDGPU);
-      lbm::FlowAroundSphereCodeGen_Outflow outflow(blocks, pdfFieldIDGPU, pdfFieldID);
-
-      lbm::FlowAroundSphereCodeGen_LbSweep lbSweep(densityFieldIDGPU, pdfFieldIDGPU, velFieldIDGPU, omega);
-#else
-      lbm::FlowAroundSphereCodeGen_UBB ubb(blocks, pdfFieldID, velocity_initialisation);
-      lbm::FlowAroundSphereCodeGen_NoSlip noSlip(blocks, pdfFieldID);
-      lbm::FlowAroundSphereCodeGen_Outflow outflow(blocks, pdfFieldID);
-
-      lbm::FlowAroundSphereCodeGen_LbSweep lbSweep(densityFieldID, pdfFieldID, velFieldID, omega);
-#endif
-
-      geometry::initBoundaryHandling< FlagField_T >(*blocks, flagFieldId, boundariesConfig);
-      geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldId, fluidFlagUID);
-
-      ubb.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("UBB"), fluidFlagUID);
-      noSlip.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("NoSlip"), fluidFlagUID);
-      outflow.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("Outflow"), fluidFlagUID);
-
-      // create time loop
-      SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps);
-
-      // Timestep Tracking and Sweeps
-      auto tracker = make_shared< lbm::TimestepTracker >(0);
-
-      AlternatingBeforeFunction communication(evenComm, oddComm, tracker);
-
-      // add LBM sweep and communication to time loop
-      timeloop.add() << BeforeFunction(communication, "communication") << Sweep(ubb.getSweep(tracker), "ubb boundary");
-      timeloop.add() << Sweep(outflow.getSweep(tracker), "outflow boundary");
-      timeloop.add() << Sweep(noSlip.getSweep(tracker), "noSlip boundary");
-      timeloop.add() << BeforeFunction(tracker->getAdvancementFunction(), "Timestep Advancement")
-                     << Sweep(lbSweep.getSweep(tracker), "LB update rule");
-
-      // LBM stability check
-      timeloop.addFuncAfterTimeStep(makeSharedFunctor(field::makeStabilityChecker< PdfField_T, FlagField_T >(
-                                       config, blocks, pdfFieldID, flagFieldId, fluidFlagUID)),
-                                    "LBM stability check");
-
-      // log remaining time
-      timeloop.addFuncAfterTimeStep(
-         timing::RemainingTimeLogger(timeloop.getNrOfTimeSteps(), remainingTimeLoggerFrequency),
-         "remaining time logger");
-
-      // add VTK output to time loop
-      uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
-      if (vtkWriteFrequency > 0)
-      {
-         auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
-                                                         "simulation_step", false, true, true, false, 0);
-
-#if defined(WALBERLA_BUILD_WITH_CUDA)
-         vtkOutput->addBeforeFunction([&]() {
-            cuda::fieldCpy< VelocityField_T, GPUField >(blocks, velFieldID, velFieldIDGPU);
-            cuda::fieldCpy< ScalarField_T, GPUField >(blocks, densityFieldID, densityFieldIDGPU);
-         });
-#endif
-         auto velWriter     = make_shared< field::VTKWriter< VelocityField_T > >(velFieldID, "velocity");
-         auto densityWriter = make_shared< field::VTKWriter< ScalarField_T > >(densityFieldID, "density");
-         FluidFilter_T filter(cellsPerBlock);
-
-         auto QCriterionWriter = make_shared< lbm::QCriterionVTKWriter< VelocityField_T, FluidFilter_T > >(
-            blocks, filter, velFieldID, "Q-Criterion");
-
-         vtkOutput->addCellDataWriter(velWriter);
-         vtkOutput->addCellDataWriter(densityWriter);
-         vtkOutput->addCellDataWriter(QCriterionWriter);
-
-         timeloop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
-      }
-
-      WcTimer simTimer;
-
-      WALBERLA_LOG_INFO_ON_ROOT("Simulating flow around sphere:"
-                                "\n timesteps:               "
-                                << timesteps << "\n reynolds number:         " << reynolds_number
-                                << "\n relaxation rate:         " << omega << "\n maximum inflow velocity: " << u_max
-                                << "\n diameter_sphere:         " << diameter_sphere)
-
-      simTimer.start();
-      timeloop.run();
-      simTimer.end();
-      WALBERLA_LOG_INFO_ON_ROOT("Simulation finished")
-      auto time            = real_c(simTimer.last());
-      auto nrOfCells       = real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
-      auto mlupsPerProcess = nrOfCells * real_c(timesteps) / time * 1e-6;
-      WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process " << mlupsPerProcess)
-      WALBERLA_LOG_RESULT_ON_ROOT("Time per time step " << time / real_c(timesteps))
-   }
-
-   return EXIT_SUCCESS;
-}
-
-} // namespace walberla
-
-int main(int argc, char** argv) { walberla::main(argc, argv); }
--- a/apps/benchmarks/FlowAroundSphereCodeGen/FlowAroundSphereCodeGen.py
+++ b/apps/benchmarks/FlowAroundSphereCodeGen/FlowAroundSphereCodeGen.py
-from pystencils import Target
-from pystencils.field import fields
-from lbmpy import LBMConfig, LBMOptimisation, LBStencil, Method, Stencil
-
-from lbmpy.advanced_streaming.utility import get_timesteps
-from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
-from lbmpy.creationfunctions import create_lb_collision_rule
-from lbmpy.boundaries import NoSlip, UBB, ExtrapolationOutflow
-
-from pystencils_walberla import CodeGeneration, generate_sweep, generate_info_header
-
-from lbmpy_walberla.additional_data_handler import UBBAdditionalDataHandler, OutflowAdditionalDataHandler
-from lbmpy_walberla import generate_lb_pack_info
-from lbmpy_walberla import generate_alternating_lbm_sweep, generate_alternating_lbm_boundary
-
-import sympy as sp
-
-with CodeGeneration() as ctx:
-    data_type = "float64" if ctx.double_accuracy else "float32"
-    stencil = LBStencil(Stencil.D3Q27)
-    q = stencil.Q
-    dim = stencil.D
-    streaming_pattern = 'esotwist'
-    timesteps = get_timesteps(streaming_pattern)
-
-    pdfs, velocity_field, density_field = fields(f"pdfs({q}), velocity({dim}), density(1) : {data_type}[{dim}D]",
-                                                 layout='fzyx')
-    omega = sp.Symbol("omega")
-    u_max = sp.Symbol("u_max")
-
-    output = {
-        'density': density_field,
-        'velocity': velocity_field
-    }
-
-    lbm_config = LBMConfig(stencil=stencil, method=Method.CUMULANT, compressible=True,
-                           relaxation_rate=omega, galilean_correction=True,
-                           field_name='pdfs', streaming_pattern=streaming_pattern, output=output)
-
-    lbm_optimisation = LBMOptimisation(symbolic_field=pdfs, cse_global=False, cse_pdfs=False)
-
-    collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_optimisation)
-    lb_method = collision_rule.method
-
-    # getter & setter
-    setter_assignments = macroscopic_values_setter(lb_method, velocity=velocity_field.center_vector,
-                                                   pdfs=pdfs, density=1.0,
-                                                   streaming_pattern=streaming_pattern,
-                                                   previous_timestep=timesteps[0])
-
-    # opt = {'instruction_set': 'sse', 'assume_aligned': True, 'nontemporal': False, 'assume_inner_stride_one': True}
-
-    stencil_typedefs = {'Stencil_T': stencil}
-    field_typedefs = {'PdfField_T': pdfs,
-                      'VelocityField_T': velocity_field,
-                      'ScalarField_T': density_field}
-
-    if ctx.cuda:
-        target = Target.GPU
-    else:
-        target = Target.CPU
-
-    # sweeps
-    generate_alternating_lbm_sweep(ctx, 'FlowAroundSphereCodeGen_LbSweep',
-                                   collision_rule, lbm_config=lbm_config, lbm_optimisation=lbm_optimisation,
-                                   target=target)
-    generate_sweep(ctx, 'FlowAroundSphereCodeGen_MacroSetter', setter_assignments, target=target)
-
-    # boundaries
-    ubb = UBB(lambda *args: None, dim=dim, data_type=data_type)
-    ubb_data_handler = UBBAdditionalDataHandler(stencil, ubb)
-    outflow = ExtrapolationOutflow(stencil[4], lb_method, streaming_pattern=streaming_pattern, data_type=data_type)
-    outflow_data_handler = OutflowAdditionalDataHandler(stencil, outflow, target=target)
-
-    generate_alternating_lbm_boundary(ctx, 'FlowAroundSphereCodeGen_UBB', ubb, lb_method,
-                                      target=target, streaming_pattern=streaming_pattern,
-                                      additional_data_handler=ubb_data_handler,
-                                      layout='fzyx')
-
-    generate_alternating_lbm_boundary(ctx, 'FlowAroundSphereCodeGen_NoSlip', NoSlip(), lb_method,
-                                      target=target, streaming_pattern=streaming_pattern,
-                                      layout='fzyx')
-
-    generate_alternating_lbm_boundary(ctx, 'FlowAroundSphereCodeGen_Outflow', outflow, lb_method,
-                                      target=target, streaming_pattern=streaming_pattern,
-                                      additional_data_handler=outflow_data_handler,
-                                      layout='fzyx')
-
-    # communication
-    generate_lb_pack_info(ctx, 'FlowAroundSphereCodeGen_PackInfo', stencil, pdfs,
-                          streaming_pattern=streaming_pattern, always_generate_separate_classes=True, target=target)
-
-    # Info header containing correct template definitions for stencil and field
-    generate_info_header(ctx, 'FlowAroundSphereCodeGen_InfoHeader',
-                         stencil_typedefs=stencil_typedefs, field_typedefs=field_typedefs)
No results found