diff --git a/.isort.cfg b/.isort.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..bfe068484e9980b93f64a94cf82a9d9ce12fdcef
--- /dev/null
+++ b/.isort.cfg
@@ -0,0 +1,5 @@
+[settings]
+line_length=100
+balanced_wrapping=True
+multi_line_output=4
+known_third_party=sympy
diff --git a/apps/tutorials/codegen/03_AdvancedLBMCodegen.cpp b/apps/tutorials/codegen/03_AdvancedLBMCodegen.cpp
index 1f24dfffe59f267ae39d80bcdaeddb71a8db73de..b6ff21d73c085938ea896d3432b0be17cf4164d7 100644
--- a/apps/tutorials/codegen/03_AdvancedLBMCodegen.cpp
+++ b/apps/tutorials/codegen/03_AdvancedLBMCodegen.cpp
@@ -73,7 +73,7 @@ typedef FlagField< flag_t > FlagField_T;
 typedef lbm::CumulantMRTNoSlip NoSlip_T;
 
 #if defined(WALBERLA_BUILD_WITH_CUDA)
-typedef cuda::GPUField< double > GPUField;
+typedef cuda::GPUField< real_t > GPUField;
 #endif
 
 //////////////////////////////////////////
@@ -247,4 +247,4 @@ int main(int argc, char** argv)
 
 } // namespace walberla
 
-int main(int argc, char** argv) { return walberla::main(argc, argv); }
\ No newline at end of file
+int main(int argc, char** argv) { return walberla::main(argc, argv); }
diff --git a/doc/doxygen.in b/doc/doxygen.in
index a98ddcec124a1f973381240d1d5dcbaa5bf91b77..ca467e074e41bf329a27a391bf78c3d5b534f004 100644
--- a/doc/doxygen.in
+++ b/doc/doxygen.in
@@ -2093,7 +2093,7 @@ HIDE_UNDOC_RELATIONS   = YES
 # set to NO
 # The default value is: NO.
 
-HAVE_DOT               = @DOXYGEN_DOT_FOUND@
+HAVE_DOT               = NO
 
 # The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
 # to run in parallel. When set to 0 doxygen will base this on the number of
diff --git a/python/lbmpy_walberla/boundary.py b/python/lbmpy_walberla/boundary.py
index ba1d697b968805548d084dac248d480e6ab76aa3..8f751f5296d74923b58c57c35da1fd97b0df82e4 100644
--- a/python/lbmpy_walberla/boundary.py
+++ b/python/lbmpy_walberla/boundary.py
@@ -1,70 +1,28 @@
-import numpy as np
-from jinja2 import Environment, PackageLoader, StrictUndefined
-
+import pystencils_walberla.boundary
 from lbmpy.boundaries.boundaryhandling import create_lattice_boltzmann_boundary_kernel
-from pystencils import Field, FieldType
-from pystencils.boundaries.createindexlist import numpy_data_type_for_boundary_object
-from pystencils.data_types import TypedSymbol, create_type
-from pystencils_walberla.boundary import struct_from_numpy_dtype
-from pystencils_walberla.codegen import default_create_kernel_parameters, KernelInfo
-from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env
-
-
-def generate_boundary(generation_context, class_name, boundary_object, lb_method, **create_kernel_params):
-    struct_name = "IndexInfo"
-    boundary_object.name = class_name
-
-    create_kernel_params = default_create_kernel_parameters(generation_context, create_kernel_params)
-    target = create_kernel_params['target']
-
-    index_struct_dtype = numpy_data_type_for_boundary_object(boundary_object, lb_method.dim)
-
-    pdf_field = Field.create_generic('pdfs', lb_method.dim,
-                                     np.float64 if generation_context.double_accuracy else np.float32,
-                                     index_dimensions=1, layout='fzyx', index_shape=[len(lb_method.stencil)])
-
-    index_field = Field('indexVector', FieldType.INDEXED, index_struct_dtype, layout=[0],
-                        shape=(TypedSymbol("indexVectorSize", create_type(np.int64)), 1), strides=(1, 1))
-
-    kernel = create_lattice_boltzmann_boundary_kernel(pdf_field, index_field, lb_method, boundary_object, target=target,
-                                                      openmp=generation_context.openmp)
-    kernel.function_name = "boundary_" + boundary_object.name
-    kernel.assumed_inner_stride_one = False
-
-    # waLBerla is a 3D framework. Therefore, a zero for the z index has to be added if we work in 2D
-    if lb_method.dim == 2:
-        stencil = ()
-        for d in lb_method.stencil:
-            d = d + (0,)
-            stencil = stencil + (d,)
-    else:
-        stencil = lb_method.stencil
-
-    stencil_info = [(i, d, ", ".join([str(e) for e in d])) for i, d in enumerate(stencil)]
-    inv_dirs = []
-    for direction in stencil:
-        inverse_dir = tuple([-i for i in direction])
-        inv_dirs.append(stencil.index(inverse_dir))
-
-    context = {
-        'class_name': boundary_object.name,
-        'StructName': struct_name,
-        'StructDeclaration': struct_from_numpy_dtype(struct_name, index_struct_dtype),
-        'kernel': KernelInfo(kernel),
-        'stencil_info': stencil_info,
-        'inverse_directions': inv_dirs,
-        'dim': lb_method.dim,
-        'target': target,
-        'namespace': 'lbm',
-        'inner_or_boundary': boundary_object.inner_or_boundary
-    }
-
-    env = Environment(loader=PackageLoader('pystencils_walberla'), undefined=StrictUndefined)
-    add_pystencils_filters_to_jinja_env(env)
 
-    header = env.get_template('Boundary.tmpl.h').render(**context)
-    source = env.get_template('Boundary.tmpl.cpp').render(**context)
 
-    source_extension = "cpp" if create_kernel_params.get("target", "cpu") == "cpu" else "cu"
-    generation_context.write_file("{}.h".format(class_name), header)
-    generation_context.write_file("{}.{}".format(class_name, source_extension), source)
+def generate_boundary(generation_context,
+                      class_name,
+                      boundary_object,
+                      lb_method,
+                      field_name='pdfs',
+                      **create_kernel_params):
+
+    def boundary_creation_function(field, index_field, stencil, boundary_functor, target='cpu', openmp=True, **kwargs):
+        return create_lattice_boltzmann_boundary_kernel(field,
+                                                        index_field,
+                                                        lb_method,
+                                                        boundary_functor,
+                                                        target=target,
+                                                        **kwargs)
+
+    pystencils_walberla.boundary.generate_boundary(generation_context,
+                                                   class_name,
+                                                   boundary_object,
+                                                   field_name=field_name,
+                                                   neighbor_stencil=lb_method.stencil,
+                                                   index_shape=[len(lb_method.stencil)],
+                                                   kernel_creation_function=boundary_creation_function,
+                                                   namespace='lbm',
+                                                   **create_kernel_params)
diff --git a/python/pystencils_walberla/boundary.py b/python/pystencils_walberla/boundary.py
index 1984fbfc52341645ad691a901ef90abdc57e5742..7c75470013a960e8181974d3e011aacd1fbf7bcd 100644
--- a/python/pystencils_walberla/boundary.py
+++ b/python/pystencils_walberla/boundary.py
@@ -1,33 +1,52 @@
+from functools import partial
+
 import numpy as np
 from jinja2 import Environment, PackageLoader, StrictUndefined
-
 from pystencils import Field, FieldType
 from pystencils.boundaries.boundaryhandling import create_boundary_kernel
 from pystencils.boundaries.createindexlist import (
     boundary_index_array_coordinate_names, direction_member_name,
     numpy_data_type_for_boundary_object)
 from pystencils.data_types import TypedSymbol, create_type
-from pystencils_walberla.codegen import KernelInfo
+from pystencils_walberla.codegen import KernelInfo, default_create_kernel_parameters
 from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env
 
 
-def generate_staggered_boundary(generation_context, class_name, boundary_object,
-                                dim, neighbor_stencil, index_shape, target='cpu'):
+def generate_boundary(generation_context,
+                      class_name,
+                      boundary_object,
+                      field_name,
+                      neighbor_stencil,
+                      index_shape,
+                      field_type=FieldType.GENERIC,
+                      kernel_creation_function=None,
+                      target='cpu',
+                      namespace='pystencils',
+                      **create_kernel_params):
     struct_name = "IndexInfo"
     boundary_object.name = class_name
+    dim = len(neighbor_stencil[0])
+
+    create_kernel_params = default_create_kernel_parameters(generation_context, create_kernel_params)
+    create_kernel_params["target"] = target
+    del create_kernel_params["cpu_vectorize_info"]
 
+    if not create_kernel_params["data_type"]:
+        create_kernel_params["data_type"] = 'double' if generation_context.double_accuracy else 'float32'
     index_struct_dtype = numpy_data_type_for_boundary_object(boundary_object, dim)
 
-    staggered_field = Field.create_generic('field', dim,
-                                           np.float64 if generation_context.double_accuracy else np.float32,
-                                           index_dimensions=len(index_shape), layout='c', index_shape=index_shape,
-                                           field_type=FieldType.STAGGERED)
+    field = Field.create_generic(field_name, dim,
+                                 np.float64 if generation_context.double_accuracy else np.float32,
+                                 index_dimensions=len(index_shape), layout='fzyx', index_shape=index_shape,
+                                 field_type=field_type)
 
     index_field = Field('indexVector', FieldType.INDEXED, index_struct_dtype, layout=[0],
                         shape=(TypedSymbol("indexVectorSize", create_type(np.int64)), 1), strides=(1, 1))
 
-    kernel = create_boundary_kernel(staggered_field, index_field, neighbor_stencil, boundary_object, target=target,
-                                    openmp=generation_context.openmp)
+    if not kernel_creation_function:
+        kernel_creation_function = create_boundary_kernel
+
+    kernel = kernel_creation_function(field, index_field, neighbor_stencil, boundary_object, **create_kernel_params)
     kernel.function_name = "boundary_" + boundary_object.name
     kernel.assumed_inner_stride_one = False
 
@@ -55,7 +74,7 @@ def generate_staggered_boundary(generation_context, class_name, boundary_object,
         'inverse_directions': inv_dirs,
         'dim': dim,
         'target': target,
-        'namespace': 'pystencils',
+        'namespace': namespace,
         'inner_or_boundary': boundary_object.inner_or_boundary
     }
 
@@ -70,63 +89,18 @@ def generate_staggered_boundary(generation_context, class_name, boundary_object,
     generation_context.write_file("{}.{}".format(class_name, source_extension), source)
 
 
-def generate_staggered_flux_boundary(generation_context, class_name, boundary_object,
-                                     dim, neighbor_stencil, index_shape, target='cpu'):
-    struct_name = "IndexInfo"
-    boundary_object.name = class_name
-
-    index_struct_dtype = numpy_data_type_for_boundary_object(boundary_object, dim)
-
-    staggered_field = Field.create_generic('flux', dim,
-                                           np.float64 if generation_context.double_accuracy else np.float32,
-                                           index_dimensions=len(index_shape), layout='c', index_shape=index_shape,
-                                           field_type=FieldType.STAGGERED_FLUX)
-
-    index_field = Field('indexVector', FieldType.INDEXED, index_struct_dtype, layout=[0],
-                        shape=(TypedSymbol("indexVectorSize", create_type(np.int64)), 1), strides=(1, 1))
-
-    kernel = create_boundary_kernel(staggered_field, index_field, neighbor_stencil, boundary_object, target=target,
-                                    openmp=generation_context.openmp)
-    kernel.function_name = "boundary_" + boundary_object.name
-    kernel.assumed_inner_stride_one = False
-
-    # waLBerla is a 3D framework. Therefore, a zero for the z index has to be added if we work in 2D
-    if dim == 2:
-        stencil = ()
-        for d in neighbor_stencil:
-            d = d + (0,)
-            stencil = stencil + (d,)
-    else:
-        stencil = neighbor_stencil
-
-    stencil_info = [(i, d, ", ".join([str(e) for e in d])) for i, d in enumerate(stencil)]
-    inv_dirs = []
-    for direction in stencil:
-        inverse_dir = tuple([-i for i in direction])
-        inv_dirs.append(stencil.index(inverse_dir))
-
-    context = {
-        'class_name': boundary_object.name,
-        'StructName': struct_name,
-        'StructDeclaration': struct_from_numpy_dtype(struct_name, index_struct_dtype),
-        'kernel': KernelInfo(kernel),
-        'stencil_info': stencil_info,
-        'inverse_directions': inv_dirs,
-        'dim': dim,
-        'target': target,
-        'namespace': 'pystencils',
-        'inner_or_boundary': boundary_object.inner_or_boundary
-    }
+def generate_staggered_boundary(generation_context, class_name, boundary_object,
+                                dim, neighbor_stencil, index_shape, target='cpu', **kwargs):
+    assert dim == len(neighbor_stencil[0])
+    generate_boundary(generation_context, class_name, boundary_object, 'field', neighbor_stencil, index_shape,
+                      FieldType.STAGGERED, target=target, **kwargs)
 
-    env = Environment(loader=PackageLoader('pystencils_walberla'), undefined=StrictUndefined)
-    add_pystencils_filters_to_jinja_env(env)
 
-    header = env.get_template('Boundary.tmpl.h').render(**context)
-    source = env.get_template('Boundary.tmpl.cpp').render(**context)
-
-    source_extension = "cpp" if target == "cpu" else "cu"
-    generation_context.write_file("{}.h".format(class_name), header)
-    generation_context.write_file("{}.{}".format(class_name, source_extension), source)
+def generate_staggered_flux_boundary(generation_context, class_name, boundary_object,
+                                     dim, neighbor_stencil, index_shape, target='cpu', **kwargs):
+    assert dim == len(neighbor_stencil[0])
+    generate_boundary(generation_context, class_name, boundary_object, 'flux', neighbor_stencil, index_shape,
+                      FieldType.STAGGERED_FLUX, target=target, **kwargs)
 
 
 def struct_from_numpy_dtype(struct_name, numpy_dtype):