From 04d183bf2bf99e91faf555de7ba6950906824099 Mon Sep 17 00:00:00 2001
From: Dominik Thoennes <dominik.thoennes@fau.de>
Date: Wed, 10 Jun 2020 12:54:30 +0200
Subject: [PATCH] modernize cuda integration; enable cuda + codegen tests

use cuda as a first level language in CMake with `enable_language(CUDA)`
enable tests where cuda and codegen are both activated
various minor fixes
---
 .gitlab-ci.yml                                | 63 +++++++++-----
 CMakeLists.txt                                | 84 +++++--------------
 apps/benchmarks/CMakeLists.txt                |  4 +
 .../FluidParticleCoupling/CMakeLists.txt      | 14 ++--
 apps/benchmarks/UniformGridGPU/CMakeLists.txt |  3 +
 .../UniformGridGPU/UniformGridGPU.cpp         |  4 +-
 .../UniformGridGPU/UniformGridGPU_AA.cpp      |  4 +-
 apps/tutorials/CMakeLists.txt                 |  6 +-
 cmake/waLBerlaFunctions.cmake                 | 14 +---
 src/cuda/communication/MemcpyPackInfo.h       |  2 +-
 tests/cuda/CMakeLists.txt                     |  2 +-
 tests/field/CMakeLists.txt                    |  7 +-
 tests/lbm/CMakeLists.txt                      |  4 +-
 13 files changed, 98 insertions(+), 113 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index f0d830a8c..a97c9c31b 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -152,13 +152,14 @@ intel_18_serial:
       - cd ..
    variables:
       <<: *build_serial_variables
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_ENABLE_GUI: 0
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
+      - cuda
       - docker
       - intel
 
@@ -173,13 +174,14 @@ intel_18_mpionly:
       - cd ..
    variables:
       <<: *build_mpionly_variables
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_ENABLE_GUI: 0
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
+      - cuda
       - docker
       - intel
 
@@ -194,13 +196,14 @@ intel_18_hybrid:
       - cd ..
    variables:
       <<: *build_hybrid_variables
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_ENABLE_GUI: 1
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
    except:
       variables:
          - $DISABLE_PER_COMMIT_BUILDS
    tags:
+      - cuda
       - docker
       - intel
 
@@ -215,13 +218,14 @@ intel_18_serial_dbg:
       - cd ..
    variables:
       <<: *build_serial_dbg_variables
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_ENABLE_GUI: 0
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
    except:
       variables:
          - $DISABLE_PER_COMMIT_BUILDS
    tags:
+      - cuda
       - docker
       - intel
 
@@ -236,13 +240,14 @@ intel_18_mpionly_dbg:
       - cd ..
    variables:
       <<: *build_mpionly_dbg_variables
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_ENABLE_GUI: 0
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
+      - cuda
       - docker
       - intel
 
@@ -257,13 +262,14 @@ intel_18_hybrid_dbg:
       - cd ..
    variables:
       <<: *build_hybrid_dbg_variables
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_ENABLE_GUI: 0
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
    except:
       variables:
          - $DISABLE_PER_COMMIT_BUILDS
    tags:
+      - cuda
       - docker
       - intel
 
@@ -272,12 +278,13 @@ intel_18_hybrid_dbg_sp:
    image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:18
    variables:
       <<: *build_hybrid_dbg_sp_variables
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_ENABLE_GUI: 0
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
+      - cuda
       - docker
       - intel
 
@@ -432,13 +439,14 @@ gcc_7_serial:
       - cd ..
    variables:
       <<: *build_serial_variables
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_ENABLE_GUI: 0
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
+      - cuda
       - docker
 
 gcc_7_mpionly:
@@ -452,13 +460,14 @@ gcc_7_mpionly:
       - cd ..
    variables:
       <<: *build_mpionly_variables
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_ENABLE_GUI: 0
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
+      - cuda
       - docker
 
 gcc_7_hybrid:
@@ -472,13 +481,14 @@ gcc_7_hybrid:
       - cd ..
    variables:
       <<: *build_hybrid_variables
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_ENABLE_GUI: 0
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
    except:
       variables:
          - $DISABLE_PER_COMMIT_BUILDS
    tags:
+      - cuda
       - docker
 
 gcc_7_serial_dbg:
@@ -492,13 +502,14 @@ gcc_7_serial_dbg:
       - cd ..
    variables:
       <<: *build_serial_dbg_variables
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_ENABLE_GUI: 1
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
    except:
       variables:
          - $DISABLE_PER_COMMIT_BUILDS
    tags:
+      - cuda
       - docker
 
 gcc_7_mpionly_dbg:
@@ -512,13 +523,14 @@ gcc_7_mpionly_dbg:
       - cd ..
    variables:
       <<: *build_mpionly_dbg_variables
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_ENABLE_GUI: 0
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
+      - cuda
       - docker
 
 gcc_7_hybrid_dbg:
@@ -532,13 +544,14 @@ gcc_7_hybrid_dbg:
       - cd ..
    variables:
       <<: *build_hybrid_dbg_variables
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_ENABLE_GUI: 0
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
    except:
       variables:
          - $DISABLE_PER_COMMIT_BUILDS
    tags:
+      - cuda
       - docker
 
 gcc_7_hybrid_dbg_sp:
@@ -546,12 +559,13 @@ gcc_7_hybrid_dbg_sp:
    image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:7
    variables:
       <<: *build_hybrid_dbg_sp_variables
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_ENABLE_GUI: 0
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
+      - cuda
       - docker
 
 gcc_8_serial:
@@ -832,13 +846,14 @@ clang_6.0_serial:
       - cd ..
    variables:
       <<: *build_serial_variables
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_ENABLE_GUI: 0
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
+      - cuda
       - docker
 
 clang_6.0_mpionly:
@@ -852,13 +867,14 @@ clang_6.0_mpionly:
       - cd ..
    variables:
       <<: *build_mpionly_variables
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_ENABLE_GUI: 0
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
+      - cuda
       - docker
 
 clang_6.0_hybrid:
@@ -872,13 +888,14 @@ clang_6.0_hybrid:
       - cd ..
    variables:
       <<: *build_hybrid_variables
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_ENABLE_GUI: 0
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
+      - cuda
       - docker
 
 clang_6.0_serial_dbg:
@@ -892,13 +909,14 @@ clang_6.0_serial_dbg:
       - cd ..
    variables:
       <<: *build_serial_dbg_variables
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_ENABLE_GUI: 0
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
+      - cuda
       - docker
 
 clang_6.0_mpionly_dbg:
@@ -912,13 +930,14 @@ clang_6.0_mpionly_dbg:
       - cd ..
    variables:
       <<: *build_mpionly_dbg_variables
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_ENABLE_GUI: 0
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
+      - cuda
       - docker
 
 clang_6.0_hybrid_dbg:
@@ -932,13 +951,14 @@ clang_6.0_hybrid_dbg:
       - cd ..
    variables:
       <<: *build_hybrid_dbg_variables
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_ENABLE_GUI: 0
       WALBERLA_BUILD_WITH_CODEGEN: "ON"
    except:
       variables:
          - $DISABLE_PER_COMMIT_BUILDS
    tags:
+      - cuda
       - docker
 
 clang_6.0_hybrid_dbg_sp:
@@ -946,12 +966,13 @@ clang_6.0_hybrid_dbg_sp:
    image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:6.0
    variables:
       <<: *build_hybrid_dbg_sp_variables
-      WALBERLA_BUILD_WITH_CUDA: "OFF"
+      WALBERLA_BUILD_WITH_CUDA: "ON"
       WALBERLA_ENABLE_GUI: 0
    only:
       variables:
          - $ENABLE_NIGHTLY_BUILDS
    tags:
+      - cuda
       - docker
 
 clang_7.0_serial:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 639b3c0ef..f4e53ec5d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -25,7 +25,7 @@
 ##
 ############################################################################################################################
 
-CMAKE_MINIMUM_REQUIRED (VERSION 3.1)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
 
 
 PROJECT ( walberla )
@@ -1094,66 +1094,28 @@ endif()
 ##
 ############################################################################################################################
 if ( WALBERLA_BUILD_WITH_CUDA )
+    include(CheckLanguage)
+    check_language(CUDA)
+    if( CMAKE_CUDA_COMPILER )
+      enable_language(CUDA)
+
+      #include directories and cudart lib is needed for cpp files that use cuda headers/libs
+      include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+      find_library(CUDART_LIBRARY cudart ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+      list ( APPEND SERVICE_LIBS ${CUDART_LIBRARY} )
+
+      #TODO: check if this is really needed? is it possible that nvtx is missing?
+      find_library( NVTX_LIBRARY nvToolsExt ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} )
+      if( NVTX_LIBRARY )
+          set( WALBERLA_CUDA_NVTX_AVAILABLE 1)
+          list ( APPEND SERVICE_LIBS ${NVTX_LIBRARY} )
+      endif()
 
-    get_directory_property(COMPILE_DEFINITIONS_SAVED_STATE DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMPILE_DEFINITIONS)
-    # cleanup compile definitions for CUDA (remove generator expression and empty elements which lead to warnings)
-    set(CLEANED_COMPILE_DEFINITIONS )
-    foreach( element ${COMPILE_DEFINITIONS_SAVED_STATE})
-        if(NOT ${element} MATCHES "^\\$")
-            list(APPEND CLEANED_COMPILE_DEFINITIONS ${element})
-        endif()
-    endforeach()
-    set_directory_properties(PROPERTIES COMPILE_DEFINITIONS CLEANED_COMPILE_DEFINITIONS)
-
-    #   set ( BUILD_SHARED_LIBS                      ON )
-    set ( CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE ON )
-
-    if( NOT WALBERLA_CXX_COMPILER_IS_MSVC )
-        set ( CUDA_PROPAGATE_HOST_FLAGS OFF CACHE BOOL "" )
-    endif()
-
-    if ( (NOT DEFINED CUDA_HOST_COMPILER) AND (${CMAKE_C_COMPILER} MATCHES "ccache") )
-        string ( STRIP "${CMAKE_C_COMPILER_ARG1}" stripped_compiler_string )
-        find_program ( CUDA_HOST_COMPILER ${stripped_compiler_string} )
-    endif ()
-
-    find_package ( CUDA REQUIRED )
-
-    if ( CUDA_FOUND )
-        include_directories ( ${CUDA_INCLUDE_DIRS} )
-        list ( APPEND SERVICE_LIBS ${CUDA_LIBRARIES} )
-
-        list( APPEND CUDA_NVCC_FLAGS "-Wno-deprecated-gpu-targets")
-
-        find_library( NVTX_LIBRARY nvToolsExt PATHS ${CUDA_TOOLKIT_ROOT_DIR} PATH_SUFFIXES lib lib64 )
-        if( NVTX_LIBRARY )
-            set( WALBERLA_CUDA_NVTX_AVAILABLE 1)
-            list ( APPEND SERVICE_LIBS ${NVTX_LIBRARY} )
-        endif()
-
-        if ( NOT "${CUDA_NVCC_FLAGS}" MATCHES "-std=" AND NOT WALBERLA_CXX_COMPILER_IS_MSVC )
-            list ( APPEND CUDA_NVCC_FLAGS "-std=c++${CMAKE_CXX_STANDARD}" )
-        endif ()
-
-        if(CMAKE_BUILD_TYPE MATCHES Debug)
-            list ( APPEND CUDA_NVCC_FLAGS "-g -G" )
-        endif()
-
-        if( WALBERLA_BUILD_WITH_FASTMATH )
-           list ( APPEND CUDA_NVCC_FLAGS "-use_fast_math" )
-        endif()
-
-        # Bug with gcc5 and cuda7.5:
-        #list( APPEND CUDA_NVCC_FLAGS  "-D_MWAITXINTRIN_H_INCLUDED -D_FORCE_INLINES  -D__STRICT_ANSI__")
-
-        # NOTICE: exisiting cuda flags are overwritten
-        #set ( CUDA_NVCC_FLAGS "--compiler-bindir=/usr/bin/g++-4.3" )
-        #set ( CUDA_NVCC_FLAGS "-arch sm_20" )
+      #CUDA_FOUND is need for our cmake mechanism
+      set ( CUDA_FOUND TRUE )
     else()
-        set ( WALBERLA_BUILD_WITH_CUDA FALSE )
+      set ( WALBERLA_BUILD_WITH_CUDA FALSE )
     endif ( )
-
-    set_directory_properties(PROPERTIES COMPILE_DEFINITIONS "${COMPILE_DEFINITIONS_SAVED_STATE}" )
 endif ( )
 ############################################################################################################################
 
@@ -1288,18 +1250,18 @@ if (WALBERLA_BUILD_DOC)
   ############################################################################################################################
   find_package ( Doxygen  )
   find_package ( HTMLHelp )
-  
+
   if ( HTML_HELP_COMPILER EQUAL "" )
      set ( HTML_HELP_FOUND "NO" )
   else ( )
      set ( HTML_HELP_FOUND "YES" )
   endif ( )
-  
+
   if ( DOXYGEN_FOUND )
      set ( DOXYGEN_HTML_HEADER ${walberla_SOURCE_DIR}/doc/header.html )
      set ( DOXYGEN_HTML_FOOTER ${walberla_SOURCE_DIR}/doc/footer.html )
      set ( DOXYGEN_HTML_OUTPUT "html" )
-  
+
      configure_file ( ${walberla_SOURCE_DIR}/doc/doxygen.in ${walberla_BINARY_DIR}/doc/doxygen.cfg @ONLY )
 
      add_custom_target ( doc   ${DOXYGEN_EXECUTABLE} ${walberla_BINARY_DIR}/doc/doxygen.cfg
diff --git a/apps/benchmarks/CMakeLists.txt b/apps/benchmarks/CMakeLists.txt
index 3037383db..0e4357193 100644
--- a/apps/benchmarks/CMakeLists.txt
+++ b/apps/benchmarks/CMakeLists.txt
@@ -14,5 +14,9 @@ add_subdirectory( PoiseuilleChannel )
 add_subdirectory( ProbeVsExtraMessage )
 add_subdirectory( SchaeferTurek )
 add_subdirectory( UniformGrid )
+if ( WALBERLA_BUILD_WITH_CODEGEN )
 add_subdirectory( UniformGridGenerated )
+endif()
+if ( WALBERLA_BUILD_WITH_CUDA )
 add_subdirectory( UniformGridGPU )
+endif()
\ No newline at end of file
diff --git a/apps/benchmarks/FluidParticleCoupling/CMakeLists.txt b/apps/benchmarks/FluidParticleCoupling/CMakeLists.txt
index e55a56959..a9b6e43d8 100644
--- a/apps/benchmarks/FluidParticleCoupling/CMakeLists.txt
+++ b/apps/benchmarks/FluidParticleCoupling/CMakeLists.txt
@@ -1,14 +1,14 @@
 waLBerla_link_files_to_builddir( "*.dat" )
 
-waLBerla_generate_target_from_python(NAME FluidParticleCouplingGeneratedLBM FILE GeneratedLBM.py
-      OUT_FILES GeneratedLBM.cpp GeneratedLBM.h
-      )
+if( WALBERLA_BUILD_WITH_CODEGEN )
 
-waLBerla_generate_target_from_python(NAME FluidParticleCouplingGeneratedLBMWithForce FILE GeneratedLBMWithForce.py
-      OUT_FILES GeneratedLBMWithForce.cpp GeneratedLBMWithForce.h
-      )
+    waLBerla_generate_target_from_python(NAME FluidParticleCouplingGeneratedLBM FILE GeneratedLBM.py
+            OUT_FILES GeneratedLBM.cpp GeneratedLBM.h
+            )
 
-if( WALBERLA_BUILD_WITH_CODEGEN )
+    waLBerla_generate_target_from_python(NAME FluidParticleCouplingGeneratedLBMWithForce FILE GeneratedLBMWithForce.py
+            OUT_FILES GeneratedLBMWithForce.cpp GeneratedLBMWithForce.h
+            )
 
    waLBerla_add_executable(NAME SphereWallCollision FILES SphereWallCollision.cpp
          DEPENDS blockforest boundary core domain_decomposition field lbm lbm_mesapd_coupling
diff --git a/apps/benchmarks/UniformGridGPU/CMakeLists.txt b/apps/benchmarks/UniformGridGPU/CMakeLists.txt
index 9ab6d1e78..4a6390633 100644
--- a/apps/benchmarks/UniformGridGPU/CMakeLists.txt
+++ b/apps/benchmarks/UniformGridGPU/CMakeLists.txt
@@ -12,6 +12,7 @@ foreach (config srt trt mrt smagorinsky entropic smagorinsky_noopt entropic_kbc_
           FILE UniformGridGPU.py
           CODEGEN_CFG ${config}
           OUT_FILES UniformGridGPU_LatticeModel.cpp UniformGridGPU_LatticeModel.h
+          UniformGridGPU_LbKernel.cu UniformGridGPU_LbKernel.h
           UniformGridGPU_NoSlip.cu UniformGridGPU_NoSlip.h
           UniformGridGPU_UBB.cu UniformGridGPU_UBB.h
           UniformGridGPU_PackInfo.cu UniformGridGPU_PackInfo.h
@@ -33,6 +34,8 @@ foreach (config srt trt mrt smagorinsky entropic)
           FILE UniformGridGPU_AA.py
           CODEGEN_CFG ${config}
           OUT_FILES UniformGridGPU_AA_PackInfoPull.cu UniformGridGPU_AA_PackInfoPull.h
+          UniformGridGPU_AA_LbKernelOdd.cu UniformGridGPU_AA_LbKernelOdd.h
+          UniformGridGPU_AA_LbKernelEven.cu UniformGridGPU_AA_LbKernelEven.h
           UniformGridGPU_AA_PackInfoPush.cu UniformGridGPU_AA_PackInfoPush.h
           UniformGridGPU_AA_MacroSetter.cpp UniformGridGPU_AA_MacroSetter.h
           UniformGridGPU_AA_MacroGetter.cpp UniformGridGPU_AA_MacroGetter.h
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp b/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp
index 391feddd7..deaea0815 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp
@@ -165,7 +165,7 @@ int main( int argc, char **argv )
       }
 
       Vector3<int> innerOuterSplit = parameters.getParameter<Vector3<int> >("innerOuterSplit", Vector3<int>(1, 1, 1));
-      for(int i=0; i< 3; ++i)
+      for(uint_t i=0; i< 3; ++i)
       {
           if( int_c(cellsPerBlock[i]) <= innerOuterSplit[i] * 2) {
               WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller or increase cellsPerBlock");
@@ -310,7 +310,7 @@ int main( int argc, char **argv )
 
       auto remainingTimeLoggerFrequency = parameters.getParameter< double >( "remainingTimeLoggerFrequency", -1.0 ); // in seconds
       if (remainingTimeLoggerFrequency > 0) {
-          auto logger = timing::RemainingTimeLogger( timeLoop.getNrOfTimeSteps() * outerIterations, remainingTimeLoggerFrequency );
+          auto logger = timing::RemainingTimeLogger( timeLoop.getNrOfTimeSteps() * uint_c( outerIterations ), remainingTimeLoggerFrequency );
           timeLoop.addFuncAfterTimeStep( logger, "remaining time logger" );
       }
 
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU_AA.cpp b/apps/benchmarks/UniformGridGPU/UniformGridGPU_AA.cpp
index c5fb073ab..dbda68b72 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU_AA.cpp
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU_AA.cpp
@@ -84,7 +84,7 @@ int main( int argc, char **argv )
 
         Vector3<int> innerOuterSplit = parameters.getParameter<Vector3<int> >("innerOuterSplit", Vector3<int>(1, 1, 1));
 
-        for(int i=0; i< 3; ++i)
+        for(uint_t i=0; i< 3; ++i)
         {
             if( int_c(cellsPerBlock[i]) <= innerOuterSplit[i] * 2) {
                 WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller or increase cellsPerBlock");
@@ -226,7 +226,7 @@ int main( int argc, char **argv )
         double  remainingTimeLoggerFrequency = parameters.getParameter< double >( "remainingTimeLoggerFrequency", -1.0 ); // in seconds
         if ( remainingTimeLoggerFrequency > 0 )
         {
-            auto logger = timing::RemainingTimeLogger( timeLoop.getNrOfTimeSteps() * outerIterations, remainingTimeLoggerFrequency );
+            auto logger = timing::RemainingTimeLogger( timeLoop.getNrOfTimeSteps() * uint_c(outerIterations), remainingTimeLoggerFrequency );
             timeLoop.addFuncAfterTimeStep( logger, "remaining time logger" );
         }
 
diff --git a/apps/tutorials/CMakeLists.txt b/apps/tutorials/CMakeLists.txt
index 15689217a..8a5887e69 100644
--- a/apps/tutorials/CMakeLists.txt
+++ b/apps/tutorials/CMakeLists.txt
@@ -1,6 +1,8 @@
 add_subdirectory(basics)
-add_subdirectory(cuda)
 add_subdirectory(lbm)
 add_subdirectory(mesa_pd)
 add_subdirectory(pde)
-add_subdirectory(pe)           
+add_subdirectory(pe)
+if( WALBERLA_BUILD_WITH_CUDA )
+    add_subdirectory(cuda)
+endif()
diff --git a/cmake/waLBerlaFunctions.cmake b/cmake/waLBerlaFunctions.cmake
index 1d80e1af2..033d50814 100644
--- a/cmake/waLBerlaFunctions.cmake
+++ b/cmake/waLBerlaFunctions.cmake
@@ -89,13 +89,9 @@ function ( waLBerla_add_module )
  	endforeach( )
 
     if ( hasSourceFiles )
-        if ( CUDA_FOUND )
-            cuda_add_library( ${moduleLibraryName} STATIC ${sourceFiles} ${otherFiles} )
-        else()
-            add_library( ${moduleLibraryName} STATIC ${sourceFiles} ${otherFiles} )
-        endif( CUDA_FOUND )
+       add_library( ${moduleLibraryName} STATIC ${sourceFiles} ${otherFiles} )
  	else( )
- 	   add_custom_target( ${moduleLibraryName} SOURCES ${sourceFiles} ${otherFiles} )  # dummy IDE target
+       add_custom_target( ${moduleLibraryName} SOURCES ${sourceFiles} ${otherFiles} )  # dummy IDE target
  	endif( )
 
     waLBerla_register_dependency ( ${moduleName} ${ARG_DEPENDS} )
@@ -201,11 +197,7 @@ function ( waLBerla_add_executable )
         endif ( )
     endif()
 
-    if ( WALBERLA_BUILD_WITH_CUDA )
-        cuda_add_executable( ${ARG_NAME} ${sourceFiles} )
-    else()
-        add_executable( ${ARG_NAME} ${sourceFiles} )
-    endif()
+    add_executable( ${ARG_NAME} ${sourceFiles} )
 
     target_link_modules  ( ${ARG_NAME} ${ARG_DEPENDS}  )
     target_link_libraries( ${ARG_NAME} ${WALBERLA_LINK_LIBRARIES_KEYWORD} ${SERVICE_LIBS} )
diff --git a/src/cuda/communication/MemcpyPackInfo.h b/src/cuda/communication/MemcpyPackInfo.h
index 5041f231e..8d85b7f1a 100644
--- a/src/cuda/communication/MemcpyPackInfo.h
+++ b/src/cuda/communication/MemcpyPackInfo.h
@@ -19,7 +19,7 @@ public:
     MemcpyPackInfo( BlockDataID pdfsID_ )
         : pdfsID(pdfsID_), numberOfGhostLayers_(0), communicateAllGhostLayers_(true)
     {};
-
+    virtual ~MemcpyPackInfo() {};
 
     virtual void pack  (stencil::Direction dir, unsigned char * buffer, IBlock * block, cudaStream_t stream);
     virtual void unpack(stencil::Direction dir, unsigned char * buffer, IBlock * block, cudaStream_t stream);
diff --git a/tests/cuda/CMakeLists.txt b/tests/cuda/CMakeLists.txt
index 8f2a4a3e9..fb00acf95 100644
--- a/tests/cuda/CMakeLists.txt
+++ b/tests/cuda/CMakeLists.txt
@@ -40,7 +40,7 @@ waLBerla_compile_test( FILES CudaMPI DEPENDS blockforest timeloop gui )
 waLBerla_compile_test( FILES AlignmentTest.cpp DEPENDS blockforest timeloop )
 
 waLBerla_generate_target_from_python(NAME MicroBenchmarkGpuLbmGenerated FILE codegen/MicroBenchmarkGpuLbm.py
-      OUT_FILES MicroBenchmarkStreamKernel.cu MicroBenchmarkCopyKernel.cu)
+      OUT_FILES MicroBenchmarkStreamKernel.cu MicroBenchmarkCopyKernel.cu MicroBenchmarkStreamKernel.h MicroBenchmarkCopyKernel.h)
 waLBerla_compile_test( FILES codegen/MicroBenchmarkGpuLbm.cpp DEPENDS MicroBenchmarkGpuLbmGenerated)
 
 endif()
\ No newline at end of file
diff --git a/tests/field/CMakeLists.txt b/tests/field/CMakeLists.txt
index c00915370..031496b99 100644
--- a/tests/field/CMakeLists.txt
+++ b/tests/field/CMakeLists.txt
@@ -17,7 +17,7 @@ waLBerla_compile_test( FILES distributors/DistributionTest.cpp)
 waLBerla_execute_test( NAME DistributionTest )
 
 waLBerla_compile_test( FILES FieldTest.cpp )
-waLBerla_execute_test( NAME FieldTest ) 
+waLBerla_execute_test( NAME FieldTest )
 
 waLBerla_compile_test( FILES FieldOfCustomTypesTest.cpp  )
 waLBerla_execute_test( NAME FieldOfCustomTypesTest )
@@ -62,7 +62,7 @@ endif( WALBERLA_BUILD_WITH_MPI )
 
 
 # CodeGen Tests
-
+if( WALBERLA_BUILD_WITH_CODEGEN )
 waLBerla_generate_target_from_python(NAME CodegenJacobiCPUGeneratedJacobiKernel FILE codegen/JacobiKernel.py
       OUT_FILES JacobiKernel2D.cpp JacobiKernel2D.h JacobiKernel3D.cpp JacobiKernel3D.h )
 waLBerla_compile_test( FILES codegen/CodegenJacobiCPU.cpp DEPENDS gui timeloop CodegenJacobiCPUGeneratedJacobiKernel)
@@ -72,5 +72,4 @@ waLBerla_generate_target_from_python(NAME CodegenPoissonGeneratedKernel FILE cod
       OUT_FILES Poisson.cpp Poisson.h )
 waLBerla_compile_test( FILES codegen/CodegenPoisson.cpp DEPENDS gui timeloop CodegenPoissonGeneratedKernel)
 waLBerla_execute_test( NAME CodegenPoisson )
-
-
+endif()
diff --git a/tests/lbm/CMakeLists.txt b/tests/lbm/CMakeLists.txt
index c75abc941..4d732d1cf 100644
--- a/tests/lbm/CMakeLists.txt
+++ b/tests/lbm/CMakeLists.txt
@@ -71,6 +71,7 @@ waLBerla_compile_test( FILES SuViscoelasticityTest.cpp DEPENDS field blockforest
 waLBerla_execute_test( NAME  SuViscoelasticityTest COMMAND $<TARGET_FILE:SuViscoelasticityTest> ${CMAKE_CURRENT_SOURCE_DIR}/Su.prm )
 
 # Code Generation
+if( WALBERLA_BUILD_WITH_CODEGEN )
 waLBerla_generate_target_from_python(NAME LbCodeGenerationExampleGenerated
       FILE codegen/LbCodeGenerationExample.py
       OUT_FILES LbCodeGenerationExample_LatticeModel.cpp LbCodeGenerationExample_LatticeModel.h
@@ -79,4 +80,5 @@ waLBerla_generate_target_from_python(NAME LbCodeGenerationExampleGenerated
 waLBerla_compile_test( FILES codegen/LbCodeGenerationExample.cpp DEPENDS LbCodeGenerationExampleGenerated)
 waLBerla_generate_target_from_python(NAME FluctuatingMRTGenerated FILE codegen/FluctuatingMRT.py
                               OUT_FILES FluctuatingMRT_LatticeModel.cpp FluctuatingMRT_LatticeModel.h )
-waLBerla_compile_test( FILES codegen/FluctuatingMRT.cpp DEPENDS FluctuatingMRTGenerated)
\ No newline at end of file
+waLBerla_compile_test( FILES codegen/FluctuatingMRT.cpp DEPENDS FluctuatingMRTGenerated)
+endif()
\ No newline at end of file
-- 
GitLab