diff --git a/pystencils/kerncraft_coupling/generate_benchmark.py b/pystencils/kerncraft_coupling/generate_benchmark.py
index c95c420d6c62d4428354f934a4e36fb7e7398b9d..7abf2578ac3802ce77add553b9dca88e39ac034d 100644
--- a/pystencils/kerncraft_coupling/generate_benchmark.py
+++ b/pystencils/kerncraft_coupling/generate_benchmark.py
@@ -1,7 +1,7 @@
 import os
 import subprocess
 
-from jinja2 import Template
+from jinja2 import Environment, PackageLoader, StrictUndefined
 
 from pystencils.astnodes import PragmaBlock
 from pystencils.backends.cbackend import generate_c, get_headers
@@ -10,116 +10,6 @@ from pystencils.data_types import get_base_type
 from pystencils.include import get_pystencils_include_path
 from pystencils.sympyextensions import prod
 
-benchmark_template = Template("""
-#include "kerncraft.h"
-#include <stdlib.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <math.h>
-#include <stdio.h>
-
-{{ includes }}
-
-{%- if likwid %}
-#include <likwid.h>
-{%- endif %}
-
-#define RESTRICT __restrict__
-#define FUNC_PREFIX
-void dummy(void *);
-void timing(double* wcTime, double* cpuTime);
-extern int var_false;
-
-
-{{kernel_code}}
-
-
-int main(int argc, char **argv)
-{
-  {%- if likwid %}
-  likwid_markerInit();
-  {%- endif %}
-
-  {%- for field_name, dataType, size in fields %}
-
-  // Initialization {{field_name}}
-  double * {{field_name}} = (double *) aligned_malloc(sizeof({{dataType}}) * {{size}}, 64);
-  for (unsigned long long i = 0; i < {{size}}; ++i)
-    {{field_name}}[i] = 0.23;
-
-  if(var_false)
-    dummy({{field_name}});
-
-  {%- endfor %}
-
-
-
-  {%- for constantName, dataType in constants %}
-
-  // Constant {{constantName}}
-  {{dataType}} {{constantName}};
-  {{constantName}} = 0.23;
-  if(var_false)
-      dummy(& {{constantName}});
-
-  {%- endfor %}
-
-  {%- if likwid and openmp %}
-  #pragma omp parallel
-  {
-  likwid_markerRegisterRegion("loop");
-  #pragma omp barrier
-  {%- elif likwid %}
-  likwid_markerRegisterRegion("loop");
-  {%- endif %}
-
-  for(int warmup = 1; warmup >= 0; --warmup) {
-    int repeat = 2;
-    if(warmup == 0) {
-      repeat = atoi(argv[1]);
-      {%- if likwid %}
-      likwid_markerStartRegion("loop");
-      {%- endif %}
-    }
-    
-    {%- if timing %}
-    double wcStartTime, cpuStartTime, wcEndTime, cpuEndTime;
-    timing(&wcStartTime, &cpuStartTime);
-    {%- endif %}
-    
-    for (; repeat > 0; --repeat)
-    {
-      {{kernelName}}({{call_argument_list}});
-
-      // Dummy calls
-      {%- for field_name, dataType, size in fields %}
-      if(var_false) dummy((void*){{field_name}});
-      {%- endfor %}
-      {%- for constantName, dataType in constants %}
-      if(var_false) dummy((void*)&{{constantName}});
-      {%- endfor %}
-    }
-    {%- if timing %}
-    timing(&wcEndTime, &cpuEndTime);
-    if( warmup == 0)
-        printf("%e\\n", (wcEndTime - wcStartTime) / atoi(argv[1]) );
-    {%- endif %}
-
-  }
-
-  {%- if likwid %}
-  likwid_markerStopRegion("loop");
-  {%- if openmp %}
-  }
-  {%- endif %}
-  {%- endif %}
-
-  {%- if likwid %}
-  likwid_markerClose();
-  {%- endif %}
-}
-""")
-
 
 def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
     """Return C code of a benchmark program for the given kernel.
@@ -157,7 +47,7 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
         if len(ast.body.args) > 0 and isinstance(ast.body.args[0], PragmaBlock):
             ast.body.args[0].pragma_line = ''
 
-    args = {
+    jinja_context = {
         'likwid': likwid,
         'openmp': openmp,
         'kernel_code': generate_c(ast, dialect='c'),
@@ -168,7 +58,10 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
         'includes': includes,
         'timing': timing,
     }
-    return benchmark_template.render(**args)
+
+    env = Environment(loader=PackageLoader('pystencils.kerncraft_coupling'), undefined=StrictUndefined)
+
+    return env.get_template('benchmark.c').render(**jinja_context)
 
 
 def run_c_benchmark(ast, inner_iterations, outer_iterations=3):
diff --git a/pystencils/kerncraft_coupling/kerncraft_interface.py b/pystencils/kerncraft_coupling/kerncraft_interface.py
index 26cd281f09aaa907750aee5dc92b46282a142710..d92bd67dae601dd43906aa43ab695bc185f3b42f 100644
--- a/pystencils/kerncraft_coupling/kerncraft_interface.py
+++ b/pystencils/kerncraft_coupling/kerncraft_interface.py
@@ -1,8 +1,11 @@
 import warnings
+import fcntl
 from collections import defaultdict
 from tempfile import TemporaryDirectory
 from typing import Optional
 
+from jinja2 import Environment, PackageLoader, StrictUndefined
+
 import kerncraft
 import sympy as sp
 from kerncraft.kerncraft import KernelCode
@@ -15,6 +18,7 @@ from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark
 from pystencils.sympyextensions import count_operations_in_ast
 from pystencils.transformations import filtered_tree_iteration
 from pystencils.utils import DotDict
+from pystencils.backends.cbackend import generate_c, get_headers
 
 
 class PyStencilsKerncraftKernel(KernelCode):
@@ -129,24 +133,70 @@ class PyStencilsKerncraftKernel(KernelCode):
             print("-----------------------------  FLOPS -------------------------------")
             pprint(self._flops)
 
-    def as_code(self, type_='iaca', openmp=False, as_filename=False):
+    def get_kernel_header(self, name='pystencils_kernel'):
+        file_name = "pystencils_kernel.h"
+        file_path = self.get_intermediate_location(file_name, machine_and_compiler_dependent=False)
+        lock_mode, lock_fp = self.lock_intermediate(file_path)
+
+        if lock_mode == fcntl.LOCK_SH:
+            # use cache
+            with open(file_path) as f:
+                code = f.read()
+        else:  # lock_mode == fcntl.LOCK_EX
+            function_signature = generate_c(self.kernel_ast, dialect='c', signature_only=True)
+
+            jinja_context = {
+                'function_signature': function_signature,
+            }
+
+            env = Environment(loader=PackageLoader('pystencils.kerncraft_coupling'), undefined=StrictUndefined)
+            file_header = env.get_template('kernel.h').render(**jinja_context)
+            with open(file_path, 'w') as f:
+                f.write(file_header)
+
+            fcntl.flock(lock_fp, fcntl.LOCK_SH)  # degrade to shared lock
+
+        return file_path, lock_fp
+
+    def get_kernel_code(self, openmp=False, name='pystencils_kernl'):
         """
         Generate and return compilable source code.
 
         Args:
             type_: can be iaca or likwid.
             openmp: if true, openmp code will be generated
-            as_filename:
+            as_filename: writes a file with the name as_filename
         """
-        code = generate_benchmark(self.kernel_ast, likwid=type_ == 'likwid', openmp=openmp)
-        if as_filename:
-            fp, already_available = self._get_intermediate_file(f'kernel_{type_}.c',
-                                                                machine_and_compiler_dependent=False)
-            if not already_available:
-                fp.write(code)
-            return fp.name
-        else:
-            return code
+        filename = 'pystencils_kernl'
+        if openmp:
+            filename += '-omp'
+        filename += '.c'
+        file_path = self.get_intermediate_location(filename, machine_and_compiler_dependent=False)
+        lock_mode, lock_fp = self.lock_intermediate(file_path)
+
+        if lock_mode == fcntl.LOCK_SH:
+            # use cache
+            with open(file_path) as f:
+                code = f.read()
+        else:  # lock_mode == fcntl.LOCK_EX
+            header_list = get_headers(self.kernel_ast)
+            includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])
+
+            kernel_code = generate_c(self.kernel_ast, dialect='c')
+
+            jinja_context = {
+                'includes': includes,
+                'kernel_code': kernel_code,
+            }
+
+            env = Environment(loader=PackageLoader('pystencils.kerncraft_coupling'), undefined=StrictUndefined)
+            file_header = env.get_template('kernel.c').render(**jinja_context)
+            with open(file_path, 'w') as f:
+                f.write(file_header)
+
+            fcntl.flock(lock_fp, fcntl.LOCK_SH)  # degrade to shared lock
+
+        return file_path, lock_fp
 
 
 class KerncraftParameters(DotDict):
diff --git a/pystencils/kerncraft_coupling/templates/benchmark.c b/pystencils/kerncraft_coupling/templates/benchmark.c
new file mode 100644
index 0000000000000000000000000000000000000000..2cda6787bc423e5b6505ef9abc74b77825474541
--- /dev/null
+++ b/pystencils/kerncraft_coupling/templates/benchmark.c
@@ -0,0 +1,108 @@
+
+#include "kerncraft.h"
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <math.h>
+#include <stdio.h>
+
+{{ includes }}
+
+{%- if likwid %}
+#include <likwid.h>
+{%- endif %}
+
+#define RESTRICT __restrict__
+#define FUNC_PREFIX
+void dummy(void *);
+void timing(double* wcTime, double* cpuTime);
+extern int var_false;
+
+
+{{kernel_code}}
+
+
+int main(int argc, char **argv)
+{
+  {%- if likwid %}
+  likwid_markerInit();
+  {%- endif %}
+
+  {%- for field_name, dataType, size in fields %}
+
+  // Initialization {{field_name}}
+  double * {{field_name}} = (double *) aligned_malloc(sizeof({{dataType}}) * {{size}}, 64);
+  for (unsigned long long i = 0; i < {{size}}; ++i)
+    {{field_name}}[i] = 0.23;
+
+  if(var_false)
+    dummy({{field_name}});
+
+  {%- endfor %}
+
+
+
+  {%- for constantName, dataType in constants %}
+
+  // Constant {{constantName}}
+  {{dataType}} {{constantName}};
+  {{constantName}} = 0.23;
+  if(var_false)
+      dummy(& {{constantName}});
+
+  {%- endfor %}
+
+  {%- if likwid and openmp %}
+  #pragma omp parallel
+  {
+  likwid_markerRegisterRegion("loop");
+  #pragma omp barrier
+  {%- elif likwid %}
+  likwid_markerRegisterRegion("loop");
+  {%- endif %}
+
+  for(int warmup = 1; warmup >= 0; --warmup) {
+    int repeat = 2;
+    if(warmup == 0) {
+      repeat = atoi(argv[1]);
+      {%- if likwid %}
+      likwid_markerStartRegion("loop");
+      {%- endif %}
+    }
+    
+    {%- if timing %}
+    double wcStartTime, cpuStartTime, wcEndTime, cpuEndTime;
+    timing(&wcStartTime, &cpuStartTime);
+    {%- endif %}
+    
+    for (; repeat > 0; --repeat)
+    {
+      {{kernelName}}({{call_argument_list}});
+
+      // Dummy calls
+      {%- for field_name, dataType, size in fields %}
+      if(var_false) dummy((void*){{field_name}});
+      {%- endfor %}
+      {%- for constantName, dataType in constants %}
+      if(var_false) dummy((void*)&{{constantName}});
+      {%- endfor %}
+    }
+    {%- if timing %}
+    timing(&wcEndTime, &cpuEndTime);
+    if( warmup == 0)
+        printf("%e\\n", (wcEndTime - wcStartTime) / atoi(argv[1]) );
+    {%- endif %}
+
+  }
+
+  {%- if likwid %}
+  likwid_markerStopRegion("loop");
+  {%- if openmp %}
+  }
+  {%- endif %}
+  {%- endif %}
+
+  {%- if likwid %}
+  likwid_markerClose();
+  {%- endif %}
+}
diff --git a/pystencils/kerncraft_coupling/templates/kernel.c b/pystencils/kerncraft_coupling/templates/kernel.c
new file mode 100644
index 0000000000000000000000000000000000000000..47fbf7cf25eda318a8fcecffa1477f5738eb1abc
--- /dev/null
+++ b/pystencils/kerncraft_coupling/templates/kernel.c
@@ -0,0 +1,18 @@
+
+#include "kerncraft.h"
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <math.h>
+#include <stdio.h>
+
+{{ includes }}
+
+#define RESTRICT __restrict__
+#define FUNC_PREFIX
+void dummy(void *);
+void timing(double* wcTime, double* cpuTime);
+extern int var_false;
+
+
+{{kernel_code}}
\ No newline at end of file
diff --git a/pystencils/kerncraft_coupling/templates/kernel.h b/pystencils/kerncraft_coupling/templates/kernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..539d51f928ca5d702dc2dad8de7396a505e5c5ee
--- /dev/null
+++ b/pystencils/kerncraft_coupling/templates/kernel.h
@@ -0,0 +1,3 @@
+#define FUNC_PREFIX
+
+{{function_signature}}
\ No newline at end of file
diff --git a/pystencils_tests/test_kerncraft_coupling.py b/pystencils_tests/test_kerncraft_coupling.py
index 2b177ea3807bd7490fa3d5a29c35ccd628b0ab2c..ac7ed5de65ef65298ac715382c5b780ee9185178 100644
--- a/pystencils_tests/test_kerncraft_coupling.py
+++ b/pystencils_tests/test_kerncraft_coupling.py
@@ -4,6 +4,8 @@ import numpy as np
 import pytest
 import sympy as sp
 import kerncraft
+from kerncraft.kernel import KernelCode
+from kerncraft.machinemodel import MachineModel
 
 from pystencils import Assignment, Field
 from pystencils.cpu import create_kernel
@@ -17,11 +19,11 @@ INPUT_FOLDER = os.path.join(SCRIPT_FOLDER, "kerncraft_inputs")
 @pytest.mark.kerncraft
 def test_compilation():
     machine_file_path = os.path.join(INPUT_FOLDER, "Example_SandyBridgeEP_E5-2680.yml")
-    machine = kerncraft.machinemodel.MachineModel(path_to_yaml=machine_file_path)
+    machine = MachineModel(path_to_yaml=machine_file_path)
 
     kernel_file_path = os.path.join(INPUT_FOLDER, "2d-5pt.c")
     with open(kernel_file_path) as kernel_file:
-        reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path)
+        reference_kernel = KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path)
         reference_kernel.get_kernel_header(name='test_kernel')
         reference_kernel.get_kernel_code(name='test_kernel')
         reference_kernel.get_main_code(kernel_function_name='test_kernel')
@@ -41,7 +43,7 @@ def test_compilation():
 @pytest.mark.kerncraft
 def analysis(kernel, model='ecmdata'):
     machine_file_path = os.path.join(INPUT_FOLDER, "Example_SandyBridgeEP_E5-2680.yml")
-    machine = kerncraft.machinemodel.MachineModel(path_to_yaml=machine_file_path)
+    machine = MachineModel(path_to_yaml=machine_file_path)
     if model == 'ecmdata':
         model = kerncraft.models.ECMData(kernel, machine, KerncraftParameters())
     elif model == 'ecm':
@@ -62,9 +64,9 @@ def test_3d_7pt_OSACA():
     size = [20, 200, 200]
     kernel_file_path = os.path.join(INPUT_FOLDER, "3d-7pt.c")
     machine_file_path = os.path.join(INPUT_FOLDER, "Example_SandyBridgeEP_E5-2680.yml")
-    machine = kerncraft.machinemodel.MachineModel(path_to_yaml=machine_file_path)
+    machine = MachineModel(path_to_yaml=machine_file_path)
     with open(kernel_file_path) as kernel_file:
-        reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path)
+        reference_kernel = KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path)
     reference_kernel.set_constant('M', size[0])
     reference_kernel.set_constant('N', size[1])
     assert size[1] == size[2]
@@ -89,7 +91,7 @@ def test_2d_5pt():
     size = [30, 50, 3]
     kernel_file_path = os.path.join(INPUT_FOLDER, "2d-5pt.c")
     with open(kernel_file_path) as kernel_file:
-        reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=None, filename=kernel_file_path)
+        reference_kernel = KernelCode(kernel_file.read(), machine=None, filename=kernel_file_path)
     reference = analysis(reference_kernel)
 
     arr = np.zeros(size)
@@ -111,7 +113,7 @@ def test_3d_7pt():
     size = [30, 50, 50]
     kernel_file_path = os.path.join(INPUT_FOLDER, "3d-7pt.c")
     with open(kernel_file_path) as kernel_file:
-        reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=None, filename=kernel_file_path)
+        reference_kernel = KernelCode(kernel_file.read(), machine=None, filename=kernel_file_path)
     reference_kernel.set_constant('M', size[0])
     reference_kernel.set_constant('N', size[1])
     assert size[1] == size[2]