Commit e31f1062 authored by Martin Bauer's avatar Martin Bauer
Browse files

flake8 linter

- removed warnings
- added flake8 as CI target
parent afc933d9
from pystencils.gpucuda.kernelcreation import create_cuda_kernel, created_indexed_cuda_kernel
from pystencils.gpucuda.cudajit import make_python_function
__all__ = ['create_cuda_kernel', 'created_indexed_cuda_kernel', 'make_python_function']
......@@ -19,7 +19,7 @@ def make_python_function(kernel_function_node, argument_dict=None):
Returns:
compiled kernel as Python function
"""
import pycuda.autoinit
import pycuda.autoinit # NOQA
from pycuda.compiler import SourceModule
if argument_dict is None:
......@@ -58,7 +58,7 @@ def make_python_function(kernel_function_node, argument_dict=None):
cache[key] = (args, block_and_thread_numbers)
cache_values.append(kwargs) # keep objects alive such that ids remain unique
func(*args, **block_and_thread_numbers)
#cuda.Context.synchronize() # useful for debugging, to get errors right after kernel was called
# cuda.Context.synchronize() # useful for debugging, to get errors right after kernel was called
wrapper.ast = kernel_function_node
wrapper.parameters = kernel_function_node.parameters
return wrapper
......@@ -143,6 +143,3 @@ def _check_arguments(parameter_specification, argument_dict):
return list(index_arr_shapes)[0]
else:
return list(array_shapes)[0]
import abc
from typing import Tuple
import sympy as sp
from pystencils.astnodes import Conditional, Block
from pystencils.slicing import normalize_slice
from pystencils.data_types import TypedSymbol, create_type
......@@ -78,7 +76,7 @@ class BlockIndexing(AbstractIndexing):
if AUTO_BLOCK_SIZE_LIMITING:
block_size = self.limit_block_size_to_device_maximum(block_size)
self._blockSize = block_size
self._iterationSlice = normalize_slice(iteration_slice, field.spatial_shape)
self._dim = field.spatial_dimensions
......@@ -127,7 +125,8 @@ class BlockIndexing(AbstractIndexing):
"""
# Get device limits
import pycuda.driver as cuda
import pycuda.autoinit
# noinspection PyUnresolvedReferences
import pycuda.autoinit # NOQA
da = cuda.device_attribute
device = cuda.Context.get_device()
......@@ -181,7 +180,8 @@ class BlockIndexing(AbstractIndexing):
:returns smaller block_size if too many registers are used.
"""
import pycuda.driver as cuda
import pycuda.autoinit
# noinspection PyUnresolvedReferences
import pycuda.autoinit # NOQA
da = cuda.device_attribute
if device is None:
......
from functools import partial
from pystencils.gpucuda.indexing import BlockIndexing
from pystencils.transformations import resolve_field_accesses, type_all_equations, parse_base_pointer_info, get_common_shape, \
substitute_array_accesses_with_constants, resolve_buffer_accesses
from pystencils.transformations import resolve_field_accesses, type_all_equations, parse_base_pointer_info, \
get_common_shape, substitute_array_accesses_with_constants, resolve_buffer_accesses
from pystencils.astnodes import Block, KernelFunction, SympyAssignment, LoopOverCoordinate
from pystencils.data_types import TypedSymbol, BasicType, StructType
from pystencils import Field, FieldType
......@@ -39,7 +39,8 @@ def create_cuda_kernel(assignments, function_name="kernel", type_info=None, inde
iteration_slice.append(slice(ghost_layers, -ghost_layers if ghost_layers > 0 else None))
else:
for i in range(len(common_shape)):
iteration_slice.append(slice(ghost_layers[i][0], -ghost_layers[i][1] if ghost_layers[i][1] > 0 else None))
iteration_slice.append(slice(ghost_layers[i][0],
-ghost_layers[i][1] if ghost_layers[i][1] > 0 else None))
indexing = indexing_creator(field=list(fields_without_buffers)[0], iteration_slice=iteration_slice)
......@@ -138,5 +139,3 @@ def created_indexed_cuda_kernel(assignments, index_fields, function_name="kernel
ast.indexing = indexing
ast.compile = partial(make_python_function, ast)
return ast
import sympy as sp
import numpy as np
from pystencils import Field, Assignment
from pystencils.slicing import normalize_slice, get_periodic_boundary_src_dst_slices
......
from .kerncraft_interface import PyStencilsKerncraftKernel, KerncraftParameters
__all__ = ['PyStencilsKerncraftKernel', 'KerncraftParameters']
from jinja2 import Template
from pystencils.cpu import generate_c
from pystencils.backends.cbackend import generate_c
from pystencils.sympyextensions import prod
from pystencils.data_types import get_base_type
......@@ -30,54 +30,52 @@ int main(int argc, char **argv)
{%- endif %}
{%- for field_name, dataType, size in fields %}
// Initialization {{field_name}}
// Initialization {{field_name}}
double * {{field_name}} = aligned_malloc(sizeof({{dataType}}) * {{size}}, 32);
for (int i = 0; i < {{size}}; ++i)
{{field_name}}[i] = 0.23;
if(var_false)
dummy({{field_name}});
dummy({{field_name}});
{%- endfor %}
{%- for constantName, dataType in constants %}
// Constant {{constantName}}
{{dataType}} {{constantName}};
{{constantName}} = 0.23;
if(var_false)
dummy(& {{constantName}});
{%- endfor %}
int repeat = atoi(argv[1]);
{%- if likwid %}
{%- if likwid %}
likwid_markerStartRegion("loop");
{%- endif %}
for (; repeat > 0; --repeat)
{
{{kernelName}}({{callArgumentList}});
// Dummy calls
// Dummy calls
{%- for field_name, dataType, size in fields %}
if(var_false) dummy({{field_name}});
if(var_false) dummy({{field_name}});
{%- endfor %}
{%- for constantName, dataType in constants %}
if(var_false) dummy(&{{constantName}});
{%- endfor %}
}
{%- if likwid %}
{%- if likwid %}
likwid_markerStopRegion("loop");
{%- endif %}
{%- if likwid %}
{%- if likwid %}
likwid_markerClose();
{%- endif %}
}
......
......@@ -6,8 +6,6 @@ from collections import defaultdict
import subprocess
import kerncraft
import kerncraft.kernel
from kerncraft.machinemodel import MachineModel
from kerncraft.models import ECM, Benchmark
from kerncraft.iaca import iaca_analyse_instrumented_binary, iaca_instrumentation
from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark
from pystencils.astnodes import LoopOverCoordinate, SympyAssignment, ResolvedFieldAccess
......@@ -94,9 +92,9 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.Kernel):
if '-std=c99' not in compiler_args:
compiler_args += ['-std=c99']
header_path = kerncraft.get_header_path()
compiler_cmd = [compiler] + compiler_args + ['-I' + header_path]
src_file = os.path.join(self.temporary_dir.name, "source.c")
asm_file = os.path.join(self.temporary_dir.name, "source.s")
iaca_asm_file = os.path.join(self.temporary_dir.name, "source.iaca.s")
......@@ -109,7 +107,7 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.Kernel):
f.write(generate_benchmark(self.ast, likwid=False))
# compile to asm files
subprocess.check_output(compiler_cmd + [src_file, '-S', '-o', asm_file])
subprocess.check_output(compiler_cmd + [src_file, '-S', '-o', asm_file])
subprocess.check_output(compiler_cmd + [dummy_src_file, '-S', '-o', dummy_asm_file])
with open(asm_file) as read, open(iaca_asm_file, 'w') as write:
......@@ -147,7 +145,8 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.Kernel):
class KerncraftParameters(DotDict):
def __init__(self):
def __init__(self, **kwargs):
super(KerncraftParameters, self).__init__(**kwargs)
self['asm_block'] = 'auto'
self['asm_increment'] = 0
self['cores'] = 1
......@@ -173,4 +172,4 @@ def search_resolved_field_accesses_in_ast(ast):
read_accesses = set()
write_accesses = set()
visit(ast, read_accesses, write_accesses)
return read_accesses, write_accesses
\ No newline at end of file
return read_accesses, write_accesses
from types import MappingProxyType
from pystencils.assignment_collection import AssignmentCollection
from pystencils.gpucuda.indexing import indexing_creator_from_params
def create_kernel(equations, target='cpu', data_type="double", iteration_slice=None, ghost_layers=None,
cpu_openmp=False, cpu_vectorize_info=None,
gpu_indexing='block', gpu_indexing_params={}):
gpu_indexing='block', gpu_indexing_params=MappingProxyType({})):
"""
Creates abstract syntax tree (AST) of kernel, using a list of update equations.
:param equations: either be a plain list of equations or a AssignmentCollection object
:param target: 'cpu', 'llvm' or 'gpu'
:param data_type: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name
to type
:param iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer part of the
field is iterated over
:param iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \
part of the field is iterated over
:param ghost_layers: if left to default, the number of necessary ghost layers is determined automatically
a single integer specifies the ghost layer count at all borders, can also be a sequence of
pairs [(x_lower_gl, x_upper_gl), .... ]
......@@ -69,7 +70,7 @@ def create_kernel(equations, target='cpu', data_type="double", iteration_slice=N
def create_indexed_kernel(assignments, index_fields, target='cpu', data_type="double", coordinate_names=('x', 'y', 'z'),
cpu_openmp=True, gpu_indexing='block', gpu_indexing_params={}):
cpu_openmp=True, gpu_indexing='block', gpu_indexing_params=MappingProxyType({})):
"""
Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with
coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling.
......@@ -97,8 +98,9 @@ def create_indexed_kernel(assignments, index_fields, target='cpu', data_type="do
raise NotImplementedError("Indexed kernels are not yet supported in LLVM backend")
elif target == 'gpu':
from pystencils.gpucuda import created_indexed_cuda_kernel
ast = created_indexed_cuda_kernel(assignments, index_fields, type_info=data_type, coordinate_names=coordinate_names,
indexing_creator=indexing_creator_from_params(gpu_indexing, gpu_indexing_params))
idx_creator = indexing_creator_from_params(gpu_indexing, gpu_indexing_params)
ast = created_indexed_cuda_kernel(assignments, index_fields, type_info=data_type,
coordinate_names=coordinate_names, indexing_creator=idx_creator)
return ast
else:
raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,))
from .kernelcreation import create_kernel, create_indexed_kernel
from .llvmjit import compile_llvm, generate_and_jit, Jit, make_python_function
from .llvm import generate_llvm
from .kernelcreation import create_kernel
from .llvmjit import make_python_function
__all__ = ['create_kernel', 'make_python_function']
import sympy as sp
import functools
from sympy import S, Indexed
from sympy.printing.printer import Printer
......@@ -266,7 +267,7 @@ class LLVMPrinter(Printer):
phi_data = []
after_block = self.builder.append_basic_block()
for (expr, condition) in piece.args:
if condition == True: # Don't use 'is' use '=='!
if condition == sp.sympify(True): # Don't use 'is' use '=='!
phi_data.append((self._print(expr), self.builder.block))
self.builder.branch(after_block)
self.builder.position_at_end(after_block)
......
......@@ -2,9 +2,6 @@ import llvmlite.ir as ir
import llvmlite.binding as llvm
import numpy as np
import ctypes as ct
import subprocess
import shutil
from pystencils.data_types import create_composite_type_from_string
from ..data_types import to_ctypes, ctypes_from_llvm
from .llvm import generate_llvm
......
from pystencils.runhelper.db import Database
from pystencils.runhelper.parameterstudy import ParameterStudy
__all__ = ['Database', 'ParameterStudy']
......@@ -231,7 +231,7 @@ def slice_intersection(slice1, slice2):
slice2 = [s if not isinstance(s, int) else slice(s, s + 1, None) for s in slice2]
new_min = [max(s1.start, s2.start) for s1, s2 in zip(slice1, slice2)]
new_max = [min(s1.stop, s2.stop) for s1, s2 in zip(slice1, slice2)]
new_max = [min(s1.stop, s2.stop) for s1, s2 in zip(slice1, slice2)]
if any(max_p - min_p < 0 for min_p, max_p in zip(new_min, new_max)):
return None
......
......@@ -506,24 +506,23 @@ def sort_assignments_topologically(assignments: Sequence[Assignment]) -> List[As
def assignments_from_python_function(func, **kwargs):
"""
Mechanism to simplify the generation of a list of sympy equations.
"""Mechanism to simplify the generation of a list of sympy equations.
Introduces a special "assignment operator" written as "@=". Each line containing this operator gives an
equation in the result list. Note that executing this function normally yields an error.
Additionally the shortcut object 'S' is available to quickly create new sympy symbols.
Example:
>>> def my_kernel(s):
... from pystencils import Field
... f = Field.create_generic('f', spatial_dimensions=2, index_dimensions=0)
... g = f.new_field_with_different_name('g')
...
... s.neighbors @= f[0,1] + f[1,0]
... g[0,0] @= s.neighbors + f[0,0]
>>> assignments_from_python_function(my_kernel)
[Assignment(neighbors, f_E + f_N), Assignment(g_C, f_C + neighbors)]
Examples:
>>> def my_kernel(s):
... from pystencils import Field
... f = Field.create_generic('f', spatial_dimensions=2, index_dimensions=0)
... g = f.new_field_with_different_name('g')
...
... s.neighbors @= f[0,1] + f[1,0]
... g[0,0] @= s.neighbors + f[0,0]
>>> assignments_from_python_function(my_kernel)
[Assignment(neighbors, f_E + f_N), Assignment(g_C, f_C + neighbors)]
"""
import inspect
import re
......
......@@ -10,9 +10,9 @@ def test_simplification_strategy():
a0, a1, a2, a3 = sp.symbols("a_:4")
subexpressions = [
Assignment(s0, 2*a + 2*b),
Assignment(s1, 2 * a + 2 * b + 2*c),
Assignment(s2, 2 * a + 2 * b + 2*c + 2*d),
Assignment(s0, 2 * a + 2 * b),
Assignment(s1, 2 * a + 2 * b + 2 * c),
Assignment(s2, 2 * a + 2 * b + 2 * c + 2 * d),
]
main = [
Assignment(a0, s0 + s1),
......
......@@ -85,7 +85,3 @@ class TimeLoop:
for f in self._timeStepFunctions:
f()
self.time_steps_run += 1
from collections import defaultdict, OrderedDict
from copy import deepcopy
from types import MappingProxyType
import sympy as sp
from sympy.logic.boolalg import Boolean
from sympy.tensor import IndexedBase
from pystencils.assignment import Assignment
from pystencils.field import Field, FieldType, offset_component_to_direction_string
from pystencils.data_types import TypedSymbol, create_type, PointerType, StructType, get_base_type, cast_func
from pystencils.data_types import TypedSymbol, PointerType, StructType, get_base_type, cast_func, \
pointer_arithmetic_func, get_type_of_expression, collate_types
from pystencils.slicing import normalize_slice
import pystencils.astnodes as ast
......@@ -232,9 +232,9 @@ def parse_base_pointer_info(base_pointer_specification, loop_order, field):
def substitute_array_accesses_with_constants(ast_node):
"""Substitutes all instances of Indexed (array accesses) that are not field accesses with constants.
Benchmarks showed that using an array access as loop bound or in pointer computations cause some compilers to do
less optimizations.
This transformation should be after field accesses have been resolved (since they introduce array accesses) and
Benchmarks showed that using an array access as loop bound or in pointer computations cause some compilers to do
less optimizations.
This transformation should be after field accesses have been resolved (since they introduce array accesses) and
before constants are moved before the loops.
"""
......@@ -331,7 +331,8 @@ def resolve_buffer_accesses(ast_node, base_buffer_index, read_only_field_names=s
def resolve_field_accesses(ast_node, read_only_field_names=set(),
field_to_base_pointer_info={}, field_to_fixed_coordinates={}):
field_to_base_pointer_info=MappingProxyType({}),
field_to_fixed_coordinates=MappingProxyType({})):
"""
Substitutes :class:`pystencils.field.Field.Access` nodes by array indexing
......@@ -632,8 +633,9 @@ def simplify_boolean_expression(expr, single_variable_ranges):
return visit(expr)
def simplify_conditionals(node, loop_conditionals={}):
def simplify_conditionals(node, loop_conditionals=MappingProxyType({})):
"""Simplifies/Removes conditions inside loops that depend on the loop counter."""
loop_conditionals = loop_conditionals.copy()
if isinstance(node, ast.LoopOverCoordinate):
ctr_sym = node.loop_counter_symbol
loop_conditionals[ctr_sym] = sp.And(ctr_sym >= node.start, ctr_sym < node.stop)
......@@ -684,8 +686,8 @@ def type_all_equations(eqs, type_for_symbol):
:param eqs: list of equations
:param type_for_symbol: dict mapping symbol names to types. Types are strings of C types like 'int' or 'double'
:return: ``fields_read, fields_written, typed_equations`` set of read fields, set of written fields, list of equations
where symbols have been replaced by typed symbols
:return: ``fields_read, fields_written, typed_equations`` set of read fields, set of written fields,
list of equations where symbols have been replaced by typed symbols
"""
if isinstance(type_for_symbol, str) or not hasattr(type_for_symbol, '__getitem__'):
type_for_symbol = typing_from_sympy_inspection(eqs, type_for_symbol)
......@@ -741,6 +743,92 @@ def type_all_equations(eqs, type_for_symbol):
return fields_read, fields_written, typed_equations
def insert_casts(node):
"""Checks the types and inserts casts and pointer arithmetic where necessary
:param node: the head node of the ast
:return: modified ast
"""
def cast(zipped_args_types, target_dtype):
"""
Adds casts to the arguments if their type differs from the target type
:param zipped_args_types: a zipped list of args and types
:param target_dtype: The target data type
:return: args with possible casts
"""
casted_args = []
for argument, data_type in zipped_args_types:
if data_type.numpy_dtype != target_dtype.numpy_dtype: # ignoring const
casted_args.append(cast_func(argument, target_dtype))
else:
casted_args.append(argument)
return casted_args
def pointer_arithmetic(expr_args):
"""
Creates a valid pointer arithmetic function
:param expr_args: Arguments of the add expression
:return: pointer_arithmetic_func
"""
pointer = None
new_args = []
for arg, data_type in expr_args:
if data_type.func is PointerType:
assert pointer is None
pointer = arg
for arg, data_type in expr_args:
if arg != pointer:
assert data_type.is_int() or data_type.is_uint()
new_args.append(arg)
new_args = sp.Add(*new_args) if len(new_args) > 0 else new_args
return pointer_arithmetic_func(pointer, new_args)
if isinstance(node, sp.AtomicExpr):
return node
args = []
for arg in node.args:
args.append(insert_casts(arg))
# TODO indexed, LoopOverCoordinate
if node.func in (sp.Add, sp.Mul, sp.Or, sp.And, sp.Pow, sp.Eq, sp.Ne, sp.Lt, sp.Le, sp.Gt, sp.Ge):
# TODO optimize pow, don't cast integer on double
types = [get_type_of_expression(arg) for arg in args]
assert len(types) > 0
target = collate_types(types)
zipped = list(zip(args, types))
if target.func is PointerType:
assert node.func is sp.Add
return pointer_arithmetic(zipped)
else:
return node.func(*cast(zipped, target))
elif node.func is ast.SympyAssignment:
lhs = args[0]
rhs = args[1]
target = get_type_of_expression(lhs)
if target.func is PointerType:
return node.func(*args) # TODO fix, not complete
else:
return node.func(lhs, *cast([(rhs, get_type_of_expression(rhs))], target))
elif node.func is ast.ResolvedFieldAccess:
return node
elif node.func is ast.Block:
for old_arg, new_arg in zip(node.args, args):
node.replace(old_arg, new_arg)
return node
elif node.func is ast.LoopOverCoordinate:
for old_arg, new_arg in zip(node.args, args):
node.replace(old_arg, new_arg)
return node
elif node.func is sp.Piecewise:
expressions = [expr for (expr, _) in args]
types = [get_type_of_expression(expr) for expr in expressions]
target = collate_types(types)
zipped = list(zip(expressions, types))
casted_expressions = cast(zipped, target)
args = [arg.func(*[expr, arg.cond]) for (arg, expr) in zip(args, casted_expressions)]
return node.func(*args)
# --------------------------------------- Helper Functions -------------------------------------------------------------
......
from .transformations import *
from .stage2 import *
import sympy as sp
from pystencils.data_types import PointerType, get_type_of_expression, collate_types, cast_func, pointer_arithmetic_func
import pystencils.astnodes as ast
def insert_casts(node):
"""
Checks the types and inserts casts and pointer arithmetic where necessary
:param node: the head node of the ast
:return: modified ast
"""
def cast(zipped_args_types, target_dtype):
"""
Adds casts to the arguments if their type differs from the target type
:param zipped_args_types: a zipped list of args and types
:param target_dtype: The target data type
:return: args with possible casts
"""
casted_args = []
for argument, data_type in zipped_args_types:
if data_type.numpy_dtype != target_dtype.numpy_dtype: # ignoring const
casted_args.append(cast_func(argument, target_dtype))
else:
casted_args.append(argument)
return casted_args
def pointer_arithmetic(expr_args):
"""
Creates a valid pointer arithmetic function
:param expr_args: Arguments of the add expression
:return: pointer_arithmetic_func
"""
pointer = None
new_args = []
for arg, data_type in expr_args:
if data_type.func is PointerType:
assert pointer is None
pointer = arg
for arg, data_type in expr_args:
if arg != pointer:
assert data_type.is_int() or data_type.is_uint()
new_args.append(arg)
new_args = sp.Add(*new_args) if len(new_args) > 0 else new_args
return pointer_arithmetic_func(pointer, new_args)
if isinstance(node, sp.AtomicExpr):
return node
args = []
for arg in node.args:
args.append(insert_casts(arg))
# TODO indexed, LoopOverCoordinate
if node.func in (sp.Add, sp.Mul, sp.Or, sp.And, sp.Pow, sp.Eq, sp.Ne, sp.Lt, sp.Le, sp.Gt, sp.Ge):
# TODO optimize pow, don't cast integer on double
types = [get_type_of_expression(arg) for arg in args]
assert len(types) > 0
target = collate_types(types)
zipped = list(zip(args, types))
if target.func is PointerType:
assert node.func is sp.Add
return pointer_arithmetic(zipped)
else:
return node.func(*cast(zipped, target))
elif node.func is ast.SympyAssignment:
lhs = args[0]
rhs = args[1]
target