LbmStorageSpecification.tmpl.h

//======================================================================================================================
//
//  This file is part of waLBerla. waLBerla is free software: you can
//  redistribute it and/or modify it under the terms of the GNU General Public
//  License as published by the Free Software Foundation, either version 3 of
//  the License, or (at your option) any later version.
//
//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
//  for more details.
//
//  You should have received a copy of the GNU General Public License along
//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \\file {{class_name}}.h
//! \\author lbmpy
//======================================================================================================================

#pragma once

#include "core/DataTypes.h"
#include "core/cell/CellInterval.h"
#include "core/mpi/SendBuffer.h"
#include "core/mpi/RecvBuffer.h"

#include "domain_decomposition/IBlock.h"
#include "field/GhostLayerField.h"

#include "stencil/{{stencil_name}}.h"
#include "stencil/Directions.h"

{% if target is equalto 'cpu' -%}
#define FUNC_PREFIX
{%- elif target is equalto 'gpu' -%}
#define FUNC_PREFIX __global__
#include "gpu/GPUWrapper.h"
#include "gpu/GPUField.h"
{%- endif %}

#ifdef __GNUC__
#define RESTRICT __restrict__
#elif _MSC_VER
#define RESTRICT __restrict
#else
#define RESTRICT
#endif

#if defined WALBERLA_CXX_COMPILER_IS_GNU || defined WALBERLA_CXX_COMPILER_IS_CLANG
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#pragma GCC diagnostic ignored "-Wunused-parameter"
#endif

namespace walberla
{
namespace {{namespace}}{

class {{class_name}}
{
 public:
   // Used lattice stencil
   using Stencil = stencil::{{stencil_name}};
   // Lattice stencil used for the communication (should be used to define which block directions need to be communicated)
   using CommunicationStencil = stencil::{{communication_stencil_name}};
   // If false used correction: Lattice Boltzmann Model for the Incompressible Navier–Stokes Equation, He 1997
   static const bool compressible = {% if compressible %}true{% else %}false{% endif %};
   // Cut off for the lattice Boltzmann equilibrium
   static const int equilibriumAccuracyOrder = {{equilibrium_accuracy_order}};
   // If true the equilibrium is computed in regard to "delta_rho" and not the actual density "rho"
   static const bool equilibriumDeviationOnly = {% if equilibrium_deviation_only -%} true {%- else -%} false {%- endif -%};
   // If streaming pattern is inplace (esotwist, aa, ...) or not (pull, push)
   static const bool inplace = {% if inplace -%} true {%- else -%} false {%- endif -%};
   // If true the background deviation (rho_0 = 1) is subtracted for the collision step.
   static const bool zeroCenteredPDFs = {% if zero_centered -%} true {%- else -%} false {%- endif -%};
   // Lattice weights
   static constexpr {{dtype}} w[{{stencil_size}}] = { {{weights}} };
   // Inverse lattice weights
   static constexpr {{dtype}} wInv[{{stencil_size}}] = { {{inverse_weights}} };

   // Compute kernels to pack and unpack MPI buffers
   class PackKernels {

    public:
      using PdfField_T = {{src_field | field_type(is_gpu=is_gpu)}};
      using value_type = typename PdfField_T::value_type;

      {% if nonuniform -%}
      {% if target is equalto 'cpu' -%}
      using MaskField_T = GhostLayerField< uint32_t, 1 >;
      {%- elif target is equalto 'gpu' -%}
      using MaskField_T = gpu::GPUField< uint32_t >;
      {%- endif %}
      {%- endif %}

      static const bool inplace = {% if inplace -%} true {%- else -%} false {%- endif -%};

      /**
       * Packs all pdfs from the given cell interval to the send buffer.
       * */
      void packAll(
         {{- [ "PdfField_T * " + src_field.name, "CellInterval & ci",
                "unsigned char * outBuffer", kernels['packAll'].kernel_selection_parameters,
                ["gpuStream_t stream = nullptr"] if is_gpu else []]
             | type_identifier_list -}}
      ) const;

      /**
       * Unpacks all pdfs from the send buffer to the given cell interval.
       * */
      void unpackAll(
         {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
                "unsigned char * inBuffer", kernels['unpackAll'].kernel_selection_parameters,
                ["gpuStream_t stream = nullptr"] if is_gpu else []]
             | type_identifier_list -}}
      ) const;

      /**
       * Copies data between two blocks on the same process.
       * All pdfs from the sending interval are copied onto the receiving interval.
       * */
      void localCopyAll(
         {{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval",
                "PdfField_T * " + dst_field.name, "CellInterval & dstInterval",
                kernels['localCopyAll'].kernel_selection_parameters,
                ["gpuStream_t stream = nullptr"] if is_gpu else []]
             | type_identifier_list -}}
      ) const;

      /**
       * Packs only those populations streaming in directions aligned with the sending direction dir from the given cell interval.
       * For example, in 2D, if dir == N, the pdfs streaming in directions NW, N, NE are packed.
       * */
      void packDirection(
         {{- [ "PdfField_T * " + src_field.name, "CellInterval & ci",
                "unsigned char * outBuffer", kernels['packDirection'].kernel_selection_parameters,
                ["gpuStream_t stream = nullptr"] if is_gpu else []]
             | type_identifier_list -}}
      ) const;

      /**
       * Unpacks only those populations streaming in directions aligned with the sending direction dir to the given cell interval.
       * For example, in 2D, if dir == N, the pdfs streaming in directions NW, N, NE are unpacked.
       * */
      void unpackDirection(
         {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
                "unsigned char * inBuffer", kernels['unpackDirection'].kernel_selection_parameters,
                ["gpuStream_t stream = nullptr"] if is_gpu else []]
             | type_identifier_list -}}
      ) const;

      /** Copies data between two blocks on the same process.
        * PDFs streaming aligned with the direction dir are copied from the sending interval onto the receiving interval.
        * */
      void localCopyDirection(
         {{- [ "PdfField_T * " + src_field.name, "CellInterval & srcInterval",
                "PdfField_T * " + dst_field.name, "CellInterval & dstInterval",
                kernels['localCopyDirection'].kernel_selection_parameters,
                ["gpuStream_t stream = nullptr"] if is_gpu else []]
             | type_identifier_list -}}
      ) const;

      /**
       * Returns the number of bytes that will be packed from / unpacked to the cell interval
       * when using packDirection / unpackDirection
       * @param ci  The cell interval
       * @param dir The communication direction
       * @return    The required size of the buffer, in bytes
       * */
      uint_t size (CellInterval & ci, stencil::Direction dir) const {
         return ci.numCells() * sizes[dir] * sizeof(value_type);
      }

      /**
       * Returns the number of bytes that will be packed from / unpacked to the cell interval
       * when using packAll / unpackAll
       * @param ci  The cell interval
       * @return    The required size of the buffer, in bytes
       * */
      uint_t size (CellInterval & ci) const {
         return ci.numCells() * {{stencil_size}} * sizeof(value_type);
      }

      {% if nonuniform -%}

      /**
       * Unpacks and uniformly redistributes populations coming from a coarse block onto the fine grid.
       * */
      void unpackRedistribute(
         {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
                "unsigned char * inBuffer", kernels['unpackRedistribute'].kernel_selection_parameters,
                ["gpuStream_t stream = nullptr"] if is_gpu else []]
             | type_identifier_list -}}
      ) const;

      /**
       * Partially coalesces and packs populations streaming from a fine block into a coarse block
       * */
      void packPartialCoalescence(
         {{- [ "PdfField_T * " + src_field.name, "MaskField_T * " + mask_field.name, "CellInterval & ci",
                "unsigned char * outBuffer", kernels['packPartialCoalescence'].kernel_selection_parameters,
                ["gpuStream_t stream = nullptr"] if is_gpu else []]
             | type_identifier_list -}}
      ) const;

      /**
       * Prepares a coarse block for coalescence by setting every population that must be coalesced from fine blocks to zero.
       * */
      void zeroCoalescenceRegion(
         {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
                kernels['zeroCoalescenceRegion'].kernel_selection_parameters,
                ["gpuStream_t stream = nullptr"] if is_gpu else []]
             | type_identifier_list -}}
      ) const;

      /**
       * Unpacks and coalesces populations coming from a fine block onto the fine grid
       * */
      void unpackCoalescence(
         {{- [ "PdfField_T * " + dst_field.name, "CellInterval & ci",
                "unsigned char * inBuffer", kernels['unpackCoalescence'].kernel_selection_parameters,
                ["gpuStream_t stream = nullptr"] if is_gpu else []]
             | type_identifier_list -}}
      ) const;

      /**
       * Returns the number of bytes that will be unpacked to the cell interval
       * when using unpackRedistribute. This is 2^{-d} of the data that would be
       * unpacked during same-level communication.
       * @param ci  The cell interval
       * @return    The required size of the buffer, in bytes
       * */
      uint_t redistributeSize(CellInterval & ci) const {
         return size(ci) >> {{dimension}};
      }

      /**
       * Returns the number of bytes that will be packed from the cell interval
       * when using packPartialCoalescence.
       * @param ci  The cell interval
       * @param dir The communication direction
       * @return    The required size of the buffer, in bytes
       * */
      uint_t partialCoalescenceSize(CellInterval & ci, stencil::Direction dir) const {
         return size(ci, dir) >> {{dimension}};
      }

      {%- endif %}

    private:
      const uint_t sizes[{{direction_sizes|length}}] { {{ direction_sizes | join(', ') }} };
   };

};

}} //{{namespace}}/walberla