diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py
index 9500fba899a052537ce979c60d655b22ac270609..b32f33d859188b4032e88782f986f97ddc406018 100644
--- a/pystencils/backends/cbackend.py
+++ b/pystencils/backends/cbackend.py
@@ -659,9 +659,9 @@ class VectorizedCustomSympyPrinter(CustomSympyPrinter):
         if isinstance(expr, vector_memory_access):
             arg, data_type, aligned, _, mask, stride = expr.args
             if stride != 1:
-                return self.instruction_set['loadS'].format("& " + self._print(arg), stride, **self._kwargs)
+                return self.instruction_set['loadS'].format(f"& {self._print(arg)}", stride, **self._kwargs)
             instruction = self.instruction_set['loadA'] if aligned else self.instruction_set['loadU']
-            return instruction.format("& " + self._print(arg), **self._kwargs)
+            return instruction.format(f"& {self._print(arg)}", **self._kwargs)
         elif isinstance(expr, cast_func):
             arg, data_type = expr.args
             if type(data_type) is VectorType:
diff --git a/pystencils/cpu/cpujit.py b/pystencils/cpu/cpujit.py
index 15eac11db141119d9247e82f94c7f4c1fadda3e9..7144a58bdae9c69cae195039ef0a05a7365053af 100644
--- a/pystencils/cpu/cpujit.py
+++ b/pystencils/cpu/cpujit.py
@@ -373,8 +373,7 @@ def equal_size_check(fields):
         return ""
 
     ref_field = fields[0]
-    cond = ["(buffer_{field.name}.shape[{i}] == buffer_{ref_field.name}.shape[{i}])".format(ref_field=ref_field,
-                                                                                            field=field_to_test, i=i)
+    cond = [f"(buffer_{field_to_test.name}.shape[{i}] == buffer_{ref_field.name}.shape[{i}])"
             for field_to_test in fields[1:]
             for i in range(fields[0].spatial_dimensions)]
     cond = " && ".join(cond)
@@ -431,8 +430,7 @@ def create_function_boilerplate_code(parameter_info, name, ast_node, insert_chec
 
                 if (np_dtype.isbuiltin and FieldType.is_generic(field)
                         and not np.issubdtype(field.dtype.numpy_dtype, np.complexfloating)):
-                    dtype_cond = "buffer_{name}.format[0] == '{format}'".format(name=field.name,
-                                                                                format=field.dtype.numpy_dtype.char)
+                    dtype_cond = f"buffer_{field.name}.format[0] == '{field.dtype.numpy_dtype.char}'"
                     pre_call_code += template_check_array.format(cond=dtype_cond, what="data type", name=field.name,
                                                                  expected=str(field.dtype.numpy_dtype))
 
@@ -461,8 +459,7 @@ def create_function_boilerplate_code(parameter_info, name, ast_node, insert_chec
         elif param.is_field_stride:
             field = param.fields[0]
             item_size = field.dtype.numpy_dtype.itemsize
-            parameters.append("buffer_{name}.strides[{i}] / {bytes}".format(bytes=item_size, i=param.symbol.coordinate,
-                                                                            name=field.name))
+            parameters.append(f"buffer_{field.name}.strides[{param.symbol.coordinate}] / {item_size}")
         elif param.is_field_shape:
             parameters.append(f"buffer_{param.field_name}.shape[{param.symbol.coordinate}]")
         elif type(param.symbol) is CFunction:
@@ -507,8 +504,8 @@ def load_kernel_from_file(module_name, function_name, path):
     except ImportError:
         import time
         import warnings
-        warnings.warn("Could not load " + path + ", trying on more time...")
-        time.sleep(1)
+        warnings.warn(f"Could not load {path}, trying on more time in 5 seconds ...")
+        time.sleep(5)
         spec = spec_from_file_location(name=module_name, location=path)
         mod = module_from_spec(spec)
         spec.loader.exec_module(mod)
diff --git a/pystencils/cpu/vectorization.py b/pystencils/cpu/vectorization.py
index f9b35b14fbb5b1c178910ad0abc97415076a232e..c0511aa16c553d24944de5d59addab0dd5877015 100644
--- a/pystencils/cpu/vectorization.py
+++ b/pystencils/cpu/vectorization.py
@@ -121,8 +121,8 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best',
                                   "to differently typed floating point fields")
     float_size = field_float_dtypes.pop().numpy_dtype.itemsize
     assert float_size in (8, 4)
-    vector_is = get_vector_instruction_set('double' if float_size == 8 else 'float',
-                                           instruction_set=instruction_set)
+    default_float_type = 'double' if float_size == 8 else 'float'
+    vector_is = get_vector_instruction_set(default_float_type, instruction_set=instruction_set)
     vector_width = vector_is['width']
     kernel_ast.instruction_set = vector_is
 
@@ -130,7 +130,7 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best',
     keep_loop_stop = '{loop_stop}' in vector_is['storeA' if assume_aligned else 'storeU']
     vectorize_inner_loops_and_adapt_load_stores(kernel_ast, vector_width, assume_aligned, nontemporal,
                                                 strided, keep_loop_stop, assume_sufficient_line_padding)
-    insert_vector_casts(kernel_ast)
+    insert_vector_casts(kernel_ast, default_float_type)
 
 
 def vectorize_inner_loops_and_adapt_load_stores(ast_node, vector_width, assume_aligned, nontemporal_fields,
@@ -242,25 +242,24 @@ def mask_conditionals(loop_body):
     visit_node(loop_body, mask=True)
 
 
-def insert_vector_casts(ast_node):
+def insert_vector_casts(ast_node, default_float_type='double'):
     """Inserts necessary casts from scalar values to vector values."""
 
     handled_functions = (sp.Add, sp.Mul, fast_division, fast_sqrt, fast_inv_sqrt, vec_any, vec_all)
 
-    def visit_expr(expr):
+    def visit_expr(expr, default_type='double'):
         if isinstance(expr, vector_memory_access):
-            return vector_memory_access(*expr.args[0:4], visit_expr(expr.args[4]), *expr.args[5:])
+            return vector_memory_access(*expr.args[0:4], visit_expr(expr.args[4], default_type), *expr.args[5:])
         elif isinstance(expr, cast_func):
             return expr
         elif expr.func is sp.Abs and 'abs' not in ast_node.instruction_set:
-            new_arg = visit_expr(expr.args[0])
+            new_arg = visit_expr(expr.args[0], default_type)
             base_type = get_type_of_expression(expr.args[0]).base_type if type(expr.args[0]) is vector_memory_access \
                 else get_type_of_expression(expr.args[0])
             pw = sp.Piecewise((-new_arg, new_arg < base_type.numpy_dtype.type(0)),
                               (new_arg, True))
-            return visit_expr(pw)
+            return visit_expr(pw, default_type)
         elif expr.func in handled_functions or isinstance(expr, sp.Rel) or isinstance(expr, BooleanFunction):
-            default_type = 'double'
             if expr.func is sp.Mul and expr.args[0] == -1:
                 # special treatment for the unary minus: make sure that the -1 has the same type as the argument
                 dtype = int
@@ -274,7 +273,7 @@ def insert_vector_casts(ast_node):
                     if dtype is np.float32:
                         default_type = 'float'
                     expr = sp.Mul(dtype(expr.args[0]), *expr.args[1:])
-            new_args = [visit_expr(a) for a in expr.args]
+            new_args = [visit_expr(a, default_type) for a in expr.args]
             arg_types = [get_type_of_expression(a, default_float_type=default_type) for a in new_args]
             if not any(type(t) is VectorType for t in arg_types):
                 return expr
@@ -285,11 +284,11 @@ def insert_vector_casts(ast_node):
                     for a, t in zip(new_args, arg_types)]
                 return expr.func(*casted_args)
         elif expr.func is sp.Pow:
-            new_arg = visit_expr(expr.args[0])
+            new_arg = visit_expr(expr.args[0], default_type)
             return expr.func(new_arg, expr.args[1])
         elif expr.func == sp.Piecewise:
-            new_results = [visit_expr(a[0]) for a in expr.args]
-            new_conditions = [visit_expr(a[1]) for a in expr.args]
+            new_results = [visit_expr(a[0], default_type) for a in expr.args]
+            new_conditions = [visit_expr(a[1], default_type) for a in expr.args]
             types_of_results = [get_type_of_expression(a) for a in new_results]
             types_of_conditions = [get_type_of_expression(a) for a in new_conditions]
 
@@ -311,14 +310,14 @@ def insert_vector_casts(ast_node):
         else:
             return expr
 
-    def visit_node(node, substitution_dict):
+    def visit_node(node, substitution_dict, default_type='double'):
         substitution_dict = substitution_dict.copy()
         for arg in node.args:
             if isinstance(arg, ast.SympyAssignment):
                 assignment = arg
                 subs_expr = fast_subs(assignment.rhs, substitution_dict,
                                       skip=lambda e: isinstance(e, ast.ResolvedFieldAccess))
-                assignment.rhs = visit_expr(subs_expr)
+                assignment.rhs = visit_expr(subs_expr, default_type)
                 rhs_type = get_type_of_expression(assignment.rhs)
                 if isinstance(assignment.lhs, TypedSymbol):
                     lhs_type = assignment.lhs.dtype
@@ -328,13 +327,13 @@ def insert_vector_casts(ast_node):
                         substitution_dict[assignment.lhs] = new_lhs
                         assignment.lhs = new_lhs
                 elif isinstance(assignment.lhs, vector_memory_access):
-                    assignment.lhs = visit_expr(assignment.lhs)
+                    assignment.lhs = visit_expr(assignment.lhs, default_type)
             elif isinstance(arg, ast.Conditional):
                 arg.condition_expr = fast_subs(arg.condition_expr, substitution_dict,
                                                skip=lambda e: isinstance(e, ast.ResolvedFieldAccess))
-                arg.condition_expr = visit_expr(arg.condition_expr)
-                visit_node(arg, substitution_dict)
+                arg.condition_expr = visit_expr(arg.condition_expr, default_type)
+                visit_node(arg, substitution_dict, default_type)
             else:
-                visit_node(arg, substitution_dict)
+                visit_node(arg, substitution_dict, default_type)
 
-    visit_node(ast_node, {})
+    visit_node(ast_node, {}, default_float_type)
diff --git a/pystencils/data_types.py b/pystencils/data_types.py
index 295afe0897b2cbb1b2f7dfa6c4ce5ea03df9f7d9..846a46b702ad12538eb0debb109eda35b651b021 100644
--- a/pystencils/data_types.py
+++ b/pystencils/data_types.py
@@ -634,10 +634,10 @@ class BasicType(Type):
             return 'ComplexDouble'
         elif name.startswith('int'):
             width = int(name[len("int"):])
-            return "int%d_t" % (width,)
+            return f"int{width}_t"
         elif name.startswith('uint'):
             width = int(name[len("uint"):])
-            return "uint%d_t" % (width,)
+            return f"uint{width}_t"
         elif name == 'bool':
             return 'bool'
         else:
@@ -736,7 +736,7 @@ class VectorType(Type):
 
     def __str__(self):
         if self.instruction_set is None:
-            return "%s[%s]" % (self.base_type, self.width)
+            return f"{self.base_type}[{self.width}]"
         else:
             if self.base_type == create_type("int64") or self.base_type == create_type("int32"):
                 return self.instruction_set['int']
diff --git a/pystencils_tests/test_vectorization.py b/pystencils_tests/test_vectorization.py
index 1bbc6a4a43b1cd7b8861243689c2d4d1f73966df..2fb216aa58df24f4c91ee0611a69e95aeb2c5f62 100644
--- a/pystencils_tests/test_vectorization.py
+++ b/pystencils_tests/test_vectorization.py
@@ -295,3 +295,4 @@ def test_issue40(*_):
 
     code = ps.get_code_str(ast)
     assert 'epi32' not in code
+
diff --git a/pystencils_tests/test_vectorization_specific.py b/pystencils_tests/test_vectorization_specific.py
index 6c2f144a4273393efa5bca3b84c1aae0a840889e..b13d8bc28f4a4daf621aa23b75578a87175a826d 100644
--- a/pystencils_tests/test_vectorization_specific.py
+++ b/pystencils_tests/test_vectorization_specific.py
@@ -6,7 +6,7 @@ import sympy as sp
 import pystencils as ps
 from pystencils.backends.simd_instruction_sets import (get_cacheline_size, get_supported_instruction_sets,
                                                        get_vector_instruction_set)
-from pystencils.data_types import cast_func, VectorType
+from . import test_vectorization
 from pystencils.enums import Target
 
 supported_instruction_sets = get_supported_instruction_sets() if get_supported_instruction_sets() else []
@@ -116,15 +116,33 @@ def test_cacheline_size(instruction_set):
         pytest.skip()
     instruction_set = get_vector_instruction_set('double', instruction_set)
     vector_size = instruction_set['bytes']
-    assert cacheline_size > 8 and cacheline_size < 0x100000, "Cache line size is implausible"
+    assert 8 < cacheline_size < 0x100000, "Cache line size is implausible"
     if type(vector_size) is int:
         assert cacheline_size % vector_size == 0, "Cache line size should be multiple of vector size"
     assert cacheline_size & (cacheline_size - 1) == 0, "Cache line size is not a power of 2"
 
 
 # test_vectorization is not parametrized because it is supposed to run without pytest, so we parametrize it here
-from pystencils_tests import test_vectorization
-@pytest.mark.parametrize('instruction_set', sorted(set(supported_instruction_sets) - set([test_vectorization.instruction_set])))
-@pytest.mark.parametrize('function', [f for f in test_vectorization.__dict__ if f.startswith('test_') and f != 'test_hardware_query'])
+@pytest.mark.parametrize('instruction_set',
+                         sorted(set(supported_instruction_sets) - {test_vectorization.instruction_set}))
+@pytest.mark.parametrize('function',
+                         [f for f in test_vectorization.__dict__ if f.startswith('test_') and f != 'test_hardware_query'])
 def test_vectorization_other(instruction_set, function):
     test_vectorization.__dict__[function](instruction_set)
+
+
+@pytest.mark.parametrize('dtype', ('float', 'double'))
+@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
+@pytest.mark.parametrize('field_layout', ('fzyx', 'zyxf'))
+def test_square_root(dtype, instruction_set, field_layout):
+    config = ps.CreateKernelConfig(data_type=dtype,
+                                   cpu_vectorize_info={'instruction_set': instruction_set,
+                                                       'assume_inner_stride_one': True,
+                                                       'assume_aligned': False, 'nontemporal': False})
+
+    src_field = ps.Field.create_generic('pdfs', 2, dtype, index_dimensions=1, layout=field_layout, index_shape=(9,))
+
+    eq = [ps.Assignment(sp.Symbol("xi"), sum(src_field.center_vector)),
+          ps.Assignment(sp.Symbol("xi_2"), sp.Symbol("xi") * sp.sqrt(src_field.center))]
+
+    ps.create_kernel(eq, config=config).compile()