Commit 0ed1a87b authored by Jan Hönig's avatar Jan Hönig
Browse files

Merge branch 'RemoveLLVM' into 'master'

Remove LLVM

See merge request pycodegen/pystencils!276
parents ddad5daf 901880d0
Pipeline #40372 failed with stages
in 6 minutes and 45 seconds
# Change Log
## Unreleased
### Removed
* LLVM backend because it was not used much and not good integrated in pystencils.
......@@ -2,6 +2,7 @@ include
include COPYING.txt
include AUTHORS.txt
global-include *.pyx
include pystencils/
......@@ -60,7 +60,6 @@ All options:
- `autodiff`: enable derivation of adjoint kernels and generation of Torch/Tensorflow operations
- `doc`: packages to build documentation
- `kerncraft`: use kerncraft for automatic performance analysis
- `llvm_jit`: llvmlite as additional CPU backend
Options can be combined e.g.
......@@ -73,12 +72,12 @@ Documentation
Read the docs [here]( and
check out the Jupyter notebooks in `doc/notebooks`.
check out the Jupyter notebooks in `doc/notebooks`. The **Changelog** of pystencils can be found [here](
Many thanks go to the [contributors](AUTHORS.txt) of pystencils.
Many thanks go to the [contributors]( of pystencils.
### Please cite us
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -6,9 +6,3 @@ try:
except ImportError:
from .llvm import generate_llvm # NOQA
except ImportError:
......@@ -138,22 +138,11 @@ def create_folder(path, is_file):
def get_llc_command():
"""Try to get executable for llvm's IR compiler llc
We try if one of the following is in PATH: llc, llc-10, llc-9, llc-8, llc-7, llc-6
candidates = ['llc', 'llc-10', 'llc-9', 'llc-8', 'llc-7', 'llc-6']
found_executables = (e for e in candidates if shutil.which(e))
return next(found_executables, None)
def read_config():
if platform.system().lower() == 'linux':
default_compiler_config = OrderedDict([
('os', 'linux'),
('command', 'g++'),
('llc_command', get_llc_command() or 'llc'),
('flags', '-Ofast -DNDEBUG -fPIC -march=native -fopenmp -std=c++11'),
('restrict_qualifier', '__restrict__')
......@@ -164,7 +153,6 @@ def read_config():
default_compiler_config = OrderedDict([
('os', 'windows'),
('msvc_version', 'latest'),
('llc_command', get_llc_command() or 'llc'),
('arch', 'x64'),
('flags', '/Ox /fp:fast /OpenMP /arch:avx'),
('restrict_qualifier', '__restrict')
......@@ -173,7 +161,6 @@ def read_config():
default_compiler_config = OrderedDict([
('os', 'darwin'),
('command', 'clang++'),
('llc_command', get_llc_command() or 'llc'),
('flags', '-Ofast -DNDEBUG -fPIC -march=native -Xclang -fopenmp -std=c++11'),
('restrict_qualifier', '__restrict__')
......@@ -13,12 +13,6 @@ import pystencils
from pystencils.cache import memorycache, memorycache_if_hashable
from pystencils.utils import all_equal
import as ir
except ImportError as e:
ir = None
_ir_importerror = e
def typed_symbols(names, dtype, *args):
symbols = sp.symbols(names, *args)
......@@ -373,67 +367,6 @@ = {
def ctypes_from_llvm(data_type):
if not ir:
raise _ir_importerror
if isinstance(data_type, ir.PointerType):
ctype = ctypes_from_llvm(data_type.pointee)
if ctype is None:
return ctypes.c_void_p
return ctypes.POINTER(ctype)
elif isinstance(data_type, ir.IntType):
if data_type.width == 8:
return ctypes.c_int8
elif data_type.width == 16:
return ctypes.c_int16
elif data_type.width == 32:
return ctypes.c_int32
elif data_type.width == 64:
return ctypes.c_int64
raise ValueError("Int width %d is not supported" % data_type.width)
elif isinstance(data_type, ir.FloatType):
return ctypes.c_float
elif isinstance(data_type, ir.DoubleType):
return ctypes.c_double
elif isinstance(data_type, ir.VoidType):
return None # Void type is not supported by ctypes
raise NotImplementedError(f'Data type {type(data_type)} of {data_type} is not supported yet')
def to_llvm_type(data_type, nvvm_target=False):
Transforms a given type into ctypes
:param data_type: Subclass of Type
:return: llvmlite type object
if not ir:
raise _ir_importerror
if isinstance(data_type, PointerType):
return to_llvm_type(data_type.base_type).as_pointer(1 if nvvm_target else 0)
if ir: = {
np.dtype(np.int8): ir.IntType(8),
np.dtype(np.int16): ir.IntType(16),
np.dtype(np.int32): ir.IntType(32),
np.dtype(np.int64): ir.IntType(64),
np.dtype(np.uint8): ir.IntType(8),
np.dtype(np.uint16): ir.IntType(16),
np.dtype(np.uint32): ir.IntType(32),
np.dtype(np.uint64): ir.IntType(64),
np.dtype(np.float32): ir.FloatType(),
np.dtype(np.float64): ir.DoubleType(),
def peel_off_type(dtype, type_to_peel_off):
while type(dtype) is type_to_peel_off:
dtype = dtype.base_type
......@@ -28,11 +28,6 @@ class Backend(Enum):
Use the C Backend of pystencils.
LLVM = auto()
Use the ``llvmlite`` package to transform the pystensilc AST to the LLVM ir.
From this point all of LLVMs optimisations can be used.
CUDA = auto()
Use the CUDA backend to generate code for NVIDIA GPUs.
......@@ -274,11 +274,6 @@ def create_domain_kernel(assignments: List[Assignment], *, config: CreateKernelC
raise ValueError("Blocking cannot be combined with cacheline-zeroing")
raise ValueError("Invalid value for cpu_vectorize_info")
elif config.backend == Backend.LLVM:
from pystencils.llvm import create_kernel
ast = create_kernel(assignments, function_name=config.function_name, type_info=config.data_type,
split_groups=split_groups, iteration_slice=config.iteration_slice,
elif == Target.GPU or == Target.OPENCL:
if config.backend == Backend.CUDA or config.backend == Backend.OPENCL:
from pystencils.gpucuda import create_cuda_kernel
from .kernelcreation import create_kernel
from .llvmjit import make_python_function
__all__ = ['create_kernel', 'make_python_function']
import as ir
class Loop(object):
def __init__(self, builder, start_val, stop_val, step_val=1, loop_name='loop', phi_name="_phi"):
self.builder = builder
self.start_val = start_val
self.stop_val = stop_val
self.step_val = step_val
self.loop_name = loop_name
self.phi_name = phi_name
def __enter__(self):
self.loop_end, self.after, phi = self._for_loop(self.start_val, self.stop_val, self.step_val, self.loop_name,
return phi
def _for_loop(self, start_val, stop_val, step_val, loop_name, phi_name):
# TODO size of int??? unisgned???
integer = ir.IntType(64)
# Loop block
pre_loop_bb = self.builder.block
loop_bb = self.builder.append_basic_block(name='loop_' + loop_name)
# Insert an explicit fall through from the current block to loop_bb
# Add phi
phi = self.builder.phi(integer, name=phi_name)
phi.add_incoming(start_val, pre_loop_bb)
loop_end_bb = self.builder.append_basic_block(name=loop_name + "_end_bb")
next_var = self.builder.add(phi, step_val, name=loop_name + '_next_it')
cond = self.builder.icmp_unsigned('<', next_var, stop_val, name=loop_name + "_cond")
after_bb = self.builder.append_basic_block(name=loop_name + "_after_bb")
self.builder.cbranch(cond, loop_bb, after_bb)
phi.add_incoming(next_var, loop_end_bb)
return loop_end_bb, after_bb, phi
def __exit__(self, exc_type, exc, exc_tb):
from pystencils.enums import Target, Backend
from pystencils.llvm.llvmjit import make_python_function
from pystencils.transformations import insert_casts
def create_kernel(assignments, function_name="kernel", type_info=None, split_groups=(),
iteration_slice=None, ghost_layers=None, target=Target.CPU):
Creates an abstract syntax tree for a kernel function, by taking a list of update rules.
Loops are created according to the field accesses in the equations.
assignments: list of sympy equations, containing accesses to :class:`pystencils.field.Field`.
Defining the update rules of the kernel
function_name: name of the generated function - only important if generated code is written out
type_info: a map from symbol name to a C type specifier. If not specified all symbols are assumed to
be of type 'double' except symbols which occur on the left hand side of equations where the
right hand side is a sympy Boolean which are assumed to be 'bool' .
split_groups: Specification on how to split up inner loop into multiple loops. For details see
transformation :func:`pystencils.transformation.split_inner_loop`
iteration_slice: if not None, iteration is done only over this slice of the field
ghost_layers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers
if None, the number of ghost layers is determined automatically and assumed to be equal for a
all dimensions
:return: :class:`pystencils.ast.KernelFunction` node
if target == Target.CPU:
from pystencils.cpu import create_kernel
code = create_kernel(assignments, function_name, type_info, split_groups, iteration_slice, ghost_layers)
elif target == Target.GPU:
from pystencils.gpucuda.kernelcreation import create_cuda_kernel
code = create_cuda_kernel(assignments,
NotImplementedError(f'{target} is not implemented for LLVM kernel creation')
code._backend = Backend.LLVM
code.body = insert_casts(code.body)
code._compile_function = make_python_function
return code
import functools
import as ir
import llvmlite.llvmpy.core as lc
import sympy as sp
from sympy import Indexed, S
from sympy.printing.printer import Printer
from pystencils import Target
from pystencils.assignment import Assignment
from pystencils.data_types import (
collate_types, create_composite_type_from_string, create_type, get_type_of_expression,
from pystencils.llvm.control_flow import Loop
# From Numba
def set_cuda_kernel(lfunc):
from llvmlite.llvmpy.core import MetaData, MetaDataString, Constant, Type
m = lfunc.module
ops = lfunc, MetaDataString.get(m, "kernel"),, 1)
md = MetaData.get(m, ops)
nmd = m.get_or_insert_named_metadata('nvvm.annotations')
# set nvvm ir version
i32 = ir.IntType(32)
md_ver = m.add_metadata([i32(1), i32(2), i32(2), i32(0)])
m.add_named_metadata('nvvmir.version', md_ver)
# From Numba
def _call_sreg(builder, name):
module = builder.module
fnty = lc.Type.function(, ())
fn = module.get_or_insert_function(fnty, name=name)
return, ())
def generate_llvm(ast_node, module=None, builder=None, target=Target.CPU):
"""Prints the ast as llvm code."""
if module is None:
module = lc.Module()
if builder is None:
builder = ir.IRBuilder()
printer = LLVMPrinter(module, builder, target=target)
return printer._print(ast_node)
# noinspection PyPep8Naming
class LLVMPrinter(Printer):
"""Convert expressions to LLVM IR"""
def __init__(self, module, builder, fn=None, target=Target.CPU, *args, **kwargs):
self.func_arg_map = kwargs.pop("func_arg_map", {})
super(LLVMPrinter, self).__init__(*args, **kwargs)
self.fp_type = ir.DoubleType()
self.fp_pointer = self.fp_type.as_pointer()
self.integer = ir.IntType(64)
self.integer_pointer = self.integer.as_pointer()
self.void = ir.VoidType()
self.module = module
self.builder = builder
self.fn = fn
self.ext_fn = {} # keep track of wrappers to external functions
self.tmp_var = {} = target
def _add_tmp_var(self, name, value):
self.tmp_var[name] = value
def _remove_tmp_var(self, name):
del self.tmp_var[name]
def _print_Number(self, n):
if get_type_of_expression(n) == create_type("int"):
return ir.Constant(self.integer, int(n))
elif get_type_of_expression(n) == create_type("double"):
return ir.Constant(self.fp_type, float(n))
raise NotImplementedError("Numbers can only have int and double", n)
def _print_Float(self, expr):
return ir.Constant(self.fp_type, float(expr))
def _print_Integer(self, expr):
return ir.Constant(self.integer, int(expr))
def _print_int(self, i):
return ir.Constant(self.integer, i)
def _print_Symbol(self, s):
val = self.tmp_var.get(s)
if not val:
# look up parameter with name s
val = self.func_arg_map.get(
if not val:
raise LookupError(f"Symbol not found: {s}")
return val
def _print_Pow(self, expr):
base0 = self._print(expr.base)
if expr.exp == S.NegativeOne:
return self.builder.fdiv(ir.Constant(self.fp_type, 1.0), base0)
if expr.exp == S.Half:
fn = self.ext_fn.get("sqrt")
if not fn:
fn_type = ir.FunctionType(self.fp_type, [self.fp_type])
fn = ir.Function(self.module, fn_type, "sqrt")
self.ext_fn["sqrt"] = fn
return, [base0], "sqrt")
if expr.exp == 2:
return self.builder.fmul(base0, base0)
elif expr.exp == 3:
return self.builder.fmul(self.builder.fmul(base0, base0), base0)
exp0 = self._print(expr.exp)
fn = self.ext_fn.get("pow")
if not fn:
fn_type = ir.FunctionType(self.fp_type, [self.fp_type, self.fp_type])
fn = ir.Function(self.module, fn_type, "pow")
self.ext_fn["pow"] = fn
return, [base0, exp0], "pow")
def _print_Mul(self, expr):
nodes = [self._print(a) for a in expr.args]
e = nodes[0]
if get_type_of_expression(expr) == create_type('double'):
mul = self.builder.fmul
else: # int TODO unsigned/signed
mul = self.builder.mul
for node in nodes[1:]:
e = mul(e, node)
return e
def _print_Add(self, expr):
nodes = [self._print(a) for a in expr.args]
e = nodes[0]
if get_type_of_expression(expr) == create_type('double'):
add = self.builder.fadd
else: # int TODO unsigned/signed
add = self.builder.add
for node in nodes[1:]:
e = add(e, node)
return e
def _print_Or(self, expr):
nodes = [self._print(a) for a in expr.args]
e = nodes[0]
for node in nodes[1:]:
e = self.builder.or_(e, node)
return e
def _print_And(self, expr):
nodes = [self._print(a) for a in expr.args]
e = nodes[0]
for node in nodes[1:]:
e = self.builder.and_(e, node)
return e
def _print_StrictLessThan(self, expr):
return self._comparison('<', expr)
def _print_LessThan(self, expr):
return self._comparison('<=', expr)
def _print_StrictGreaterThan(self, expr):
return self._comparison('>', expr)
def _print_GreaterThan(self, expr):
return self._comparison('>=', expr)
def _print_Unequality(self, expr):
return self._comparison('!=', expr)
def _print_Equality(self, expr):
return self._comparison('==', expr)
def _comparison(self, cmpop, expr):
if collate_types([get_type_of_expression(arg) for arg in expr.args]) == create_type('double'):
comparison = self.builder.fcmp_unordered
comparison = self.builder.icmp_signed
return comparison(cmpop, self._print(expr.lhs), self._print(expr.rhs))
def _print_KernelFunction(self, func):
# KernelFunction does not posses a return type
return_type = self.void
parameter_type = []
parameters = func.get_parameters()
for parameter in parameters:
parameter_type.append(to_llvm_type(parameter.symbol.dtype, == Target.GPU))
func_type = ir.FunctionType(return_type, tuple(parameter_type))
name = func.function_name
fn = ir.Function(self.module, func_type, name)
self.ext_fn[name] = fn
# set proper names to arguments
for i, arg in enumerate(fn.args): = parameters[i]
self.func_arg_map[parameters[i]] = arg
# func.attributes.add("inlinehint")
# func.attributes.add("argmemonly")
block = fn.append_basic_block(name="entry")
self.builder = ir.IRBuilder(block) # TODO use goto_block instead
self.fn = fn
if == Target.GPU:
return fn
def _print_Block(self, block):
for node in block.args:
def _print_LoopOverCoordinate(self, loop):
with Loop(self.builder, self._print(loop.start), self._print(loop.stop), self._print(loop.step),
loop.loop_counter_name, as i:
self._add_tmp_var(loop.loop_counter_symbol, i)
def _print_SympyAssignment(self, assignment):
expr = self._print(assignment.rhs)
lhs = assignment.lhs
if isinstance(lhs, Indexed):
ptr = self._print(lhs.base.label)
index = self._print(lhs.args[1])
gep = self.builder.gep(ptr, [index])
return, gep)
self.func_arg_map[] = expr
return expr
def _print_boolean_cast_func(self, conversion):
return self._print_cast_func(conversion)
def _print_cast_func(self, conversion):
node = self._print(conversion.args[0])
to_dtype = get_type_of_expression(conversion)
from_dtype = get_type_of_expression(conversion.args[0])
if from_dtype == to_dtype:
return self._print(conversion.args[0])
# (From, to)
decision = {
create_composite_type_from_string("int64")): functools.partial(self.builder.zext, node, self.integer),
create_composite_type_from_string("int64")): functools.partial(self.builder.zext, node, self.integer),
create_composite_type_from_string("double")): functools.partial(self.builder.sitofp, node, self.fp_type),
create_composite_type_from_string("double")): functools.partial(self.builder.sitofp, node, self.fp_type),
create_composite_type_from_string("int")): functools.partial(self.builder.fptosi, node, self.integer),
(create_composite_type_from_string("double *"),
create_composite_type_from_string("int")): functools.partial(self.builder.ptrtoint, node, self.integer),
create_composite_type_from_string("double *")): functools.partial(self.builder.inttoptr,
node, self.fp_pointer),
(create_composite_type_from_string("double * restrict"),
create_composite_type_from_string("int")): functools.partial(self.builder.ptrtoint, node, self.integer),
create_composite_type_from_string("double * restrict")): functools.partial(self.builder.inttoptr, node,
(create_composite_type_from_string("double * restrict const"),
create_composite_type_from_string("int")): functools.partial(self.builder.ptrtoint, node,
create_composite_type_from_string("double * restrict const")): functools.partial(self.builder.inttoptr,
node, self.fp_pointer),
# TODO float, TEST: const, restrict
# TODO bitcast, addrspacecast
# TODO unsigned/signed fills
# print([x for x in decision.keys()])
# print("Types:")
# print([(type(x), type(y)) for (x, y) in decision.keys()])
# print("Cast:")
# print((from_dtype, to_dtype))
return decision[(from_dtype, to_dtype)]()
def _print_pointer_arithmetic_func(self, pointer):
ptr = self._print(pointer.args[0])
index = self._print(pointer.args[1])
return self.builder.