From e31f1062fcdc6d103d1e6f12ae220914399d3813 Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Fri, 13 Apr 2018 17:16:10 +0200 Subject: [PATCH] flake8 linter - removed warnings - added flake8 as CI target --- __init__.py | 2 +- assignment_collection/simplifications.py | 2 +- .../simplificationstrategy.py | 2 +- astnodes.py | 5 +- backends/__init__.py | 11 +- backends/cbackend.py | 5 +- backends/dot.py | 1 - backends/simd_instruction_sets.py | 2 +- boundaries/__init__.py | 2 + boundaries/boundaryhandling.py | 16 +-- boundaries/createindexlist.py | 4 +- cpu/__init__.py | 3 +- cpu/cpujit.py | 4 +- cpu/kernelcreation.py | 6 +- datahandling/datahandling_interface.py | 2 +- datahandling/serial_datahandling.py | 15 ++- derivative.py | 8 +- display_utils.py | 2 +- field.py | 7 +- finitedifferences.py | 17 ++- gpucuda/__init__.py | 2 + gpucuda/cudajit.py | 7 +- gpucuda/indexing.py | 10 +- gpucuda/kernelcreation.py | 9 +- gpucuda/periodicity.py | 1 - kerncraft_coupling/__init__.py | 2 + kerncraft_coupling/generate_benchmark.py | 44 ++++--- kerncraft_coupling/kerncraft_interface.py | 13 +-- kernelcreation.py | 14 ++- llvm/__init__.py | 7 +- llvm/llvm.py | 3 +- llvm/llvmjit.py | 3 - runhelper/__init__.py | 2 + slicing.py | 2 +- sympyextensions.py | 29 +++-- test_simplification_strategy.py | 6 +- timeloop.py | 4 - .../transformations.py => transformations.py | 108 ++++++++++++++++-- transformations/__init__.py | 2 - transformations/stage2.py | 89 --------------- vectorization.py | 1 - 41 files changed, 240 insertions(+), 234 deletions(-) rename transformations/transformations.py => transformations.py (89%) delete mode 100644 transformations/__init__.py delete mode 100644 transformations/stage2.py diff --git a/__init__.py b/__init__.py index 05e5ab6ed..dc7ee7421 100644 --- a/__init__.py +++ b/__init__.py @@ -1,5 +1,5 @@ """Module to generate stencil kernels in C or CUDA using sympy expressions and call them as Python functions""" -from . import sympy_gmpy_bug_workaround +from . import sympy_gmpy_bug_workaround # NOQA from .field import Field, FieldType from .data_types import TypedSymbol from .slicing import make_slice diff --git a/assignment_collection/simplifications.py b/assignment_collection/simplifications.py index 6e7173f13..6d6de1414 100644 --- a/assignment_collection/simplifications.py +++ b/assignment_collection/simplifications.py @@ -98,4 +98,4 @@ def apply_on_all_subexpressions(operation: Callable[[sp.Expr], sp.Expr]) -> Call result = [Assignment(eq.lhs, operation(eq.rhs)) for eq in ac.subexpressions] return ac.copy(ac.main_assignments, result) f.__name__ = operation.__name__ - return f \ No newline at end of file + return f diff --git a/assignment_collection/simplificationstrategy.py b/assignment_collection/simplificationstrategy.py index a66fcd2cd..bfd88f2ef 100644 --- a/assignment_collection/simplificationstrategy.py +++ b/assignment_collection/simplificationstrategy.py @@ -84,7 +84,7 @@ class SimplificationStrategy(object): report = Report() op = assignment_collection.operation_count total = op['adds'] + op['muls'] + op['divs'] - report.add(ReportElement("OriginalTerm", '-', op['adds'], op['muls'], op['divs'], total)) + report.add(ReportElement("OriginalTerm", '-', op['adds'], op['muls'], op['divs'], total)) for t in self._rules: start_time = timeit.default_timer() assignment_collection = t(assignment_collection) diff --git a/astnodes.py b/astnodes.py index 54eaafffb..ebd0c82ec 100644 --- a/astnodes.py +++ b/astnodes.py @@ -60,7 +60,8 @@ class Conditional(Node): false_block: optional block which is run if conditional is false """ - def __init__(self, condition_expr: sp.Basic, true_block: Union['Block', 'SympyAssignment'], false_block: Optional['Block'] = None) -> None: + def __init__(self, condition_expr: sp.Basic, true_block: Union['Block', 'SympyAssignment'], + false_block: Optional['Block'] = None) -> None: super(Conditional, self).__init__(parent=None) assert condition_expr.is_Boolean or condition_expr.is_Relational @@ -379,7 +380,7 @@ class LoopOverCoordinate(Node): return None if symbol.dtype != create_type('int'): return None - coordinate = int(symbol.name[len(prefix)+1:]) + coordinate = int(symbol.name[len(prefix) + 1:]) return coordinate @staticmethod diff --git a/backends/__init__.py b/backends/__init__.py index 6e03c0383..8c21ed1a8 100644 --- a/backends/__init__.py +++ b/backends/__init__.py @@ -1,7 +1,14 @@ from .cbackend import generate_c +__all__ = ['generate_c'] try: - from .dot import print_dot - from .llvm import generate_llvm + from .dot import print_dot # NOQA + __all__.append('print_dot') +except ImportError: + pass + +try: + from .llvm import generate_llvm # NOQA + __all__.append('generate_llvm') except ImportError: pass diff --git a/backends/cbackend.py b/backends/cbackend.py index 998d86d54..6ada8ef7f 100644 --- a/backends/cbackend.py +++ b/backends/cbackend.py @@ -13,7 +13,7 @@ from pystencils.astnodes import Node, ResolvedFieldAccess, SympyAssignment from pystencils.data_types import create_type, PointerType, get_type_of_expression, VectorType, cast_func from pystencils.backends.simd_instruction_sets import selected_instruction_set -__all__ = ['generate_c', 'CustomCppCode', 'PrintNode', 'get_headers'] +__all__ = ['generate_c', 'CustomCppCode', 'PrintNode', 'get_headers', 'CustomSympyPrinter'] def generate_c(ast_node: Node, signature_only: bool = False, use_float_constants: Optional[bool] = None) -> str: @@ -161,7 +161,8 @@ class CBackend: def _print_SympyAssignment(self, node): if node.is_declaration: data_type = "const " + str(node.lhs.dtype) + " " if node.is_const else str(node.lhs.dtype) + " " - return "%s %s = %s;" % (data_type, self.sympy_printer.doprint(node.lhs), self.sympy_printer.doprint(node.rhs)) + return "%s %s = %s;" % (data_type, self.sympy_printer.doprint(node.lhs), + self.sympy_printer.doprint(node.rhs)) else: lhs_type = get_type_of_expression(node.lhs) if type(lhs_type) is VectorType and node.lhs.func == cast_func: diff --git a/backends/dot.py b/backends/dot.py index dccc6ac5d..d477da0a4 100644 --- a/backends/dot.py +++ b/backends/dot.py @@ -104,4 +104,3 @@ def print_dot(node, view=False, short=False, full=False, **kwargs): if view: return graphviz.Source(dot) return dot - diff --git a/backends/simd_instruction_sets.py b/backends/simd_instruction_sets.py index 118872995..d69b21dea 100644 --- a/backends/simd_instruction_sets.py +++ b/backends/simd_instruction_sets.py @@ -20,7 +20,7 @@ def x86_vector_instruction_set(data_type='double', instruction_set='avx'): 'sqrt': 'sqrt[0]', - 'makeVec': 'set[0,0,0,0]', + 'makeVec': 'set[0,0,0,0]', 'makeZero': 'setzero[]', 'loadU': 'loadu[0]', diff --git a/boundaries/__init__.py b/boundaries/__init__.py index 0192bbc2d..afce6f99c 100644 --- a/boundaries/__init__.py +++ b/boundaries/__init__.py @@ -1,3 +1,5 @@ from pystencils.boundaries.boundaryhandling import BoundaryHandling from pystencils.boundaries.boundaryconditions import Neumann from pystencils.boundaries.inkernel import add_neumann_boundary + +__all__ = ['BoundaryHandling', 'Neumann', 'add_neumann_boundary'] diff --git a/boundaries/boundaryhandling.py b/boundaries/boundaryhandling.py index 7a276f28e..2f4b4c82b 100644 --- a/boundaries/boundaryhandling.py +++ b/boundaries/boundaryhandling.py @@ -20,7 +20,7 @@ class FlagInterface: # Add flag field to data handling if it does not yet exist if data_handling.has_data(self.flag_field_name): raise ValueError("There is already a boundary handling registered at the data handling." - "If you want to add multiple handlings, choose a different name.") + "If you want to add multiple handling objects, choose a different name.") data_handling.add_array(self.flag_field_name, dtype=self.FLAG_DTYPE, cpu=True, gpu=False) ff_ghost_layers = data_handling.ghost_layers_of_field(self.flag_field_name) @@ -47,7 +47,8 @@ class BoundaryHandling: self._boundary_object_to_boundary_info = {} self.stencil = stencil self._dirty = True - self.flag_interface = flag_interface if flag_interface is not None else FlagInterface(data_handling, name + "Flags") + fi = flag_interface + self.flag_interface = fi if fi is not None else FlagInterface(data_handling, name + "Flags") gpu = self._target == 'gpu' data_handling.add_custom_class(self._index_array_name, self.IndexFieldBlockData, cpu=True, gpu=gpu) @@ -121,7 +122,8 @@ class BoundaryHandling: else: flag = self._add_boundary(boundary_obj) - for b in self._data_handling.iterate(slice_obj, ghost_layers=ghost_layers, inner_ghost_layers=inner_ghost_layers): + for b in self._data_handling.iterate(slice_obj, ghost_layers=ghost_layers, + inner_ghost_layers=inner_ghost_layers): flag_arr = b[self.flag_interface.flag_field_name] if mask_callback is not None: mask = mask_callback(*b.midpoint_arrays) @@ -206,10 +208,10 @@ class BoundaryHandling: def _add_boundary(self, boundary_obj, flag=None): if boundary_obj not in self._boundary_object_to_boundary_info: - symbolic_index_field = Field.create_generic('indexField', spatial_dimensions=1, - dtype=numpy_data_type_for_boundary_object(boundary_obj, self.dim)) + sym_index_field = Field.create_generic('indexField', spatial_dimensions=1, + dtype=numpy_data_type_for_boundary_object(boundary_obj, self.dim)) ast = self._create_boundary_kernel(self._data_handling.fields[self._field_name], - symbolic_index_field, boundary_obj) + sym_index_field, boundary_obj) if flag is None: flag = self.flag_interface.allocate_next_flag() boundary_info = self.BoundaryInfo(boundary_obj, flag=flag, kernel=ast.compile()) @@ -253,7 +255,7 @@ class BoundaryHandling: self.kernel = kernel class IndexFieldBlockData: - def __init__(self, *args, **kwargs): + def __init__(self, *_1, **_2): self.boundary_object_to_index_list = {} self.boundary_objectToDataSetter = {} diff --git a/boundaries/createindexlist.py b/boundaries/createindexlist.py index ed417c07a..4eecddc47 100644 --- a/boundaries/createindexlist.py +++ b/boundaries/createindexlist.py @@ -3,7 +3,7 @@ import itertools import warnings try: - import pyximport; + import pyximport pyximport.install() from pystencils.boundaries.createindexlistcython import create_boundary_index_list_2d, create_boundary_index_list_3d @@ -31,7 +31,7 @@ def _create_boundary_index_list_python(flag_field_arr, nr_of_ghost_layers, bound result = [] gl = nr_of_ghost_layers - for cell in itertools.product(*reversed([range(gl, i-gl) for i in flag_field_arr.shape])): + for cell in itertools.product(*reversed([range(gl, i - gl) for i in flag_field_arr.shape])): cell = cell[::-1] if not flag_field_arr[cell] & fluid_mask: continue diff --git a/cpu/__init__.py b/cpu/__init__.py index 0e592f13b..900a32eab 100644 --- a/cpu/__init__.py +++ b/cpu/__init__.py @@ -1,3 +1,4 @@ from pystencils.cpu.kernelcreation import create_kernel, create_indexed_kernel, add_openmp from pystencils.cpu.cpujit import make_python_function -from pystencils.backends.cbackend import generate_c + +__all__ = ['create_kernel', 'create_indexed_kernel', 'add_openmp', 'make_python_function'] diff --git a/cpu/cpujit.py b/cpu/cpujit.py index fcfcc816f..7f32f2b5e 100644 --- a/cpu/cpujit.py +++ b/cpu/cpujit.py @@ -247,7 +247,7 @@ def compile_object_cache_to_shared_library(): try: if compiler_config['os'] == 'windows': all_object_files = glob.glob(os.path.join(cache_config['object_cache'], '*.obj')) - link_cmd = ['link.exe', '/DLL', '/out:' + shared_library] + link_cmd = ['link.exe', '/DLL', '/out:' + shared_library] else: all_object_files = glob.glob(os.path.join(cache_config['object_cache'], '*.o')) link_cmd = [compiler_config['command'], '-shared', '-o', shared_library] @@ -318,7 +318,7 @@ def compile_windows(ast, code_hash_str, src_file, lib_file): # Compilation if not os.path.exists(object_file): generate_code(ast, compiler_config['restrict_qualifier'], - '__declspec(dllexport)', src_file) + '__declspec(dllexport)', src_file) # /c compiles only, /EHsc turns of exception handling in c code compile_cmd = ['cl.exe', '/c', '/EHsc'] + compiler_config['flags'].split() diff --git a/cpu/kernelcreation.py b/cpu/kernelcreation.py index 8a5e4e18c..c2f7b7a64 100644 --- a/cpu/kernelcreation.py +++ b/cpu/kernelcreation.py @@ -2,8 +2,8 @@ import sympy as sp from functools import partial from pystencils.astnodes import SympyAssignment, Block, LoopOverCoordinate, KernelFunction from pystencils.transformations import resolve_buffer_accesses, resolve_field_accesses, make_loop_over_domain, \ - type_all_equations, get_optimal_loop_ordering, parse_base_pointer_info, move_constants_before_loop, split_inner_loop, \ - substitute_array_accesses_with_constants + type_all_equations, get_optimal_loop_ordering, parse_base_pointer_info, move_constants_before_loop, \ + split_inner_loop, substitute_array_accesses_with_constants from pystencils.data_types import TypedSymbol, BasicType, StructType, create_type from pystencils.field import Field, FieldType import pystencils.astnodes as ast @@ -175,7 +175,7 @@ def add_openmp(ast_node, schedule="static", num_threads=True): outer_loops = [l for l in body.atoms(ast.LoopOverCoordinate) if l.is_outermost_loop] assert outer_loops, "No outer loop found" - assert len(outer_loops) <= 1, "More than one outer loop found. Which one should be parallelized?" + assert len(outer_loops) <= 1, "More than one outer loop found. Not clear where to put OpenMP pragma." loop_to_parallelize = outer_loops[0] try: loop_range = int(loop_to_parallelize.stop - loop_to_parallelize.start) diff --git a/datahandling/datahandling_interface.py b/datahandling/datahandling_interface.py index d48912795..cb01439e8 100644 --- a/datahandling/datahandling_interface.py +++ b/datahandling/datahandling_interface.py @@ -352,7 +352,7 @@ class Block: @property def global_slice(self): """Slice in global coordinates.""" - return tuple(slice(off, off+size) for off, size in zip(self._offset, self.shape)) + return tuple(slice(off, off + size) for off, size in zip(self._offset, self.shape)) def __getitem__(self, data_name: str) -> np.ndarray: raise NotImplementedError() diff --git a/datahandling/serial_datahandling.py b/datahandling/serial_datahandling.py index 7e9adeafa..077801719 100644 --- a/datahandling/serial_datahandling.py +++ b/datahandling/serial_datahandling.py @@ -10,7 +10,7 @@ from pystencils.utils import DotDict try: import pycuda.gpuarray as gpuarray - import pycuda.autoinit + import pycuda.autoinit # NOQA except ImportError: gpuarray = None @@ -276,13 +276,12 @@ class SerialDataHandling(DataHandling): from pystencils.slicing import get_periodic_boundary_functor result.append(get_periodic_boundary_functor(filtered_stencil, ghost_layers=gls)) else: - from pystencils.gpucuda.periodicity import get_periodic_boundary_functor - result.append(get_periodic_boundary_functor(filtered_stencil, self._domainSize, - index_dimensions=self.fields[name].index_dimensions, - index_dim_shape=self._field_information[name][ - 'values_per_cell'], - dtype=self.fields[name].dtype.numpy_dtype, - ghost_layers=gls)) + from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as boundary_func + result.append(boundary_func(filtered_stencil, self._domainSize, + index_dimensions=self.fields[name].index_dimensions, + index_dim_shape=self._field_information[name]['values_per_cell'], + dtype=self.fields[name].dtype.numpy_dtype, + ghost_layers=gls)) if target == 'cpu': def result_functor(): diff --git a/derivative.py b/derivative.py index 3135761f7..1610f83e8 100644 --- a/derivative.py +++ b/derivative.py @@ -149,6 +149,7 @@ class DiffOperator(sp.Expr): Multiplications of 'DiffOperator's are interpreted as nested application of differentiation: i.e. DiffOperator('x')*DiffOperator('x') is a second derivative replaced by Diff(Diff(arg, x), t) """ + def handle_mul(mul): args = normalize_product(mul) diffs = [a for a in args if isinstance(a, DiffOperator)] @@ -169,6 +170,7 @@ class DiffOperator(sp.Expr): else: return expr * argument if apply_to_constants else expr + # ---------------------------------------------------------------------------------------------------------------------- @@ -186,6 +188,7 @@ def derivative_terms(expr): else: for a in e.args: visit(a) + visit(expr) return result @@ -261,7 +264,7 @@ def full_diff_expand(expr, functions=None, constants=None): independent_terms *= factor for i in range(len(dependent_terms)): dependent_term = dependent_terms[i] - other_dependent_terms = dependent_terms[:i] + dependent_terms[i+1:] + other_dependent_terms = dependent_terms[:i] + dependent_terms[i + 1:] processed_diff = normalize_diff_order(Diff(dependent_term, **diff_args)) result += independent_terms * prod(other_dependent_terms) * processed_diff return result @@ -278,6 +281,7 @@ def full_diff_expand(expr, functions=None, constants=None): def normalize_diff_order(expression, functions=None, constants=None, sort_key=default_diff_sort_key): """Assumes order of differentiation can be exchanged. Changes the order of nested Diffs to a standard order defined by the sorting key 'sort_key' such that the derivative terms can be further simplified """ + def visit(expr): if isinstance(expr, Diff): nodes = [expr] @@ -425,12 +429,14 @@ def replace_diff(expr, replacement_dict): def zero_diffs(expr, label): """Replaces all differentials with the given target by 0""" + def visit(e): if isinstance(e, Diff): if e.target == label: return 0 new_args = [visit(arg) for arg in e.args] return e.func(*new_args) if new_args else e + return visit(expr) diff --git a/display_utils.py b/display_utils.py index 35da77adb..d4c924bc4 100644 --- a/display_utils.py +++ b/display_utils.py @@ -37,7 +37,7 @@ def show_code(ast: KernelFunction): Can either be displayed as HTML in Jupyter notebooks or printed as normal string. """ - from pystencils.cpu import generate_c + from pystencils.backends.cbackend import generate_c class CodeDisplay: def __init__(self, ast_input): diff --git a/field.py b/field.py index 40a2accb0..085f82c63 100644 --- a/field.py +++ b/field.py @@ -5,8 +5,6 @@ import numpy as np import sympy as sp from sympy.core.cache import cacheit from sympy.tensor import IndexedBase - -from pystencils.assignment import Assignment from pystencils.alignedarray import aligned_empty from pystencils.data_types import TypedSymbol, create_type, create_composite_type_from_string, StructType from pystencils.sympyextensions import is_integer_sequence @@ -69,6 +67,7 @@ class Field(object): >>> jacobi = ( f[-1,0] + f[1,0] + f[0,-1] + f[0,1] ) / 4 Example with index dimensions: LBM D2Q9 stream pull + >>> from pystencils import Assignment >>> stencil = np.array([[0,0], [0,1], [0,-1]]) >>> src = Field.create_generic("src", spatial_dimensions=2, index_dimensions=1) >>> dst = Field.create_generic("dst", spatial_dimensions=2, index_dimensions=1) @@ -366,7 +365,7 @@ class Field(object): __xnew_cached_ = staticmethod(cacheit(__new_stage2__)) def __call__(self, *idx): - if self._index != tuple([0]*self.field.index_dimensions): + if self._index != tuple([0] * self.field.index_dimensions): raise ValueError("Indexing an already indexed Field.Access") idx = tuple(idx) @@ -520,7 +519,7 @@ def layout_string_to_tuple(layout_str, dim): return tuple(reversed(range(dim))) elif layout_str == 'zyxf' or layout_str == 'aos': assert dim <= 4 - return tuple(reversed(range(dim - 1))) + (dim-1,) + return tuple(reversed(range(dim - 1))) + (dim - 1,) elif layout_str == 'f' or layout_str == 'reverse_numpy': return tuple(reversed(range(dim))) elif layout_str == 'c' or layout_str == 'numpy': diff --git a/finitedifferences.py b/finitedifferences.py index db9a28b33..26124f999 100644 --- a/finitedifferences.py +++ b/finitedifferences.py @@ -103,7 +103,7 @@ def discretize_staggered(term, symbols_to_field_dict, coordinate, coordinate_off up, down = __up_down_offsets(d, dim) for i, s in enumerate(symbols): center_grad = (field[up](i) - field[down](i)) / (2 * dx) - neighbor_grad = (field[up+offset](i) - field[down+offset](i)) / (2 * dx) + neighbor_grad = (field[up + offset](i) - field[down + offset](i)) / (2 * dx) substitutions[grad(s)[d]] = (center_grad + neighbor_grad) / 2 return fast_subs(term, substitutions) @@ -170,9 +170,9 @@ class Advection(sp.Function): name_suffix = "_%s" % self.scalar_index if self.scalar_index is not None else "" if isinstance(self.vector, Field): return r"\nabla \cdot(%s %s)" % (printer.doprint(sp.Symbol(self.vector.name)), - printer.doprint(sp.Symbol(self.scalar.name+name_suffix))) + printer.doprint(sp.Symbol(self.scalar.name + name_suffix))) else: - args = [r"\partial_%d(%s %s)" % (i, printer.doprint(sp.Symbol(self.scalar.name+name_suffix)), + args = [r"\partial_%d(%s %s)" % (i, printer.doprint(sp.Symbol(self.scalar.name + name_suffix)), printer.doprint(self.vector[i])) for i in range(self.dim)] return " + ".join(args) @@ -233,7 +233,7 @@ class Diffusion(sp.Function): coeff = self.diffusion_coeff diff_coeff = sp.Symbol(coeff.name) if isinstance(coeff, Field) else coeff return r"div(%s \nabla %s)" % (printer.doprint(diff_coeff), - printer.doprint(sp.Symbol(self.scalar.name+name_suffix))) + printer.doprint(sp.Symbol(self.scalar.name + name_suffix))) # --- Interface for discretization strategy @@ -277,7 +277,7 @@ class Transient(sp.Function): def _latex(self, printer): name_suffix = "_%s" % self.scalar_index if self.scalar_index is not None else "" - return r"\partial_t %s" % (printer.doprint(sp.Symbol(self.scalar.name+name_suffix)),) + return r"\partial_t %s" % (printer.doprint(sp.Symbol(self.scalar.name + name_suffix)),) def transient(scalar, idx=None): @@ -312,7 +312,7 @@ class Discretization2ndOrder: - expr.diffusion_scalar_at_offset(0, 0) * expr.diffusion_coefficient_at_offset(0, 0)) for offset in [-1, 1]] result += first_diffs[1] - first_diffs[0] - return result / (self.dx**2) + return result / (self.dx ** 2) def _discretize_advection(self, expr): result = 0 @@ -352,8 +352,8 @@ class Discretization2ndOrder: else: assert all(i >= 0 for i in indices) offsets = [(1, 1), [-1, 1], [1, -1], [-1, -1]] - result = sum(o1*o2 * fa.neighbor(indices[0], o1).neighbor(indices[1], o2) for o1, o2 in offsets) / 4 - return result / (self.dx**2) + result = sum(o1 * o2 * fa.neighbor(indices[0], o1).neighbor(indices[1], o2) for o1, o2 in offsets) / 4 + return result / (self.dx ** 2) else: raise NotImplementedError("Term contains derivatives of order > 2") @@ -380,4 +380,3 @@ class Discretization2ndOrder: else: print(transient_terms) raise NotImplementedError("Cannot discretize expression with more than one transient term") - diff --git a/gpucuda/__init__.py b/gpucuda/__init__.py index a2786d277..f360794ee 100644 --- a/gpucuda/__init__.py +++ b/gpucuda/__init__.py @@ -1,2 +1,4 @@ from pystencils.gpucuda.kernelcreation import create_cuda_kernel, created_indexed_cuda_kernel from pystencils.gpucuda.cudajit import make_python_function + +__all__ = ['create_cuda_kernel', 'created_indexed_cuda_kernel', 'make_python_function'] diff --git a/gpucuda/cudajit.py b/gpucuda/cudajit.py index 47c8272c7..544adea58 100644 --- a/gpucuda/cudajit.py +++ b/gpucuda/cudajit.py @@ -19,7 +19,7 @@ def make_python_function(kernel_function_node, argument_dict=None): Returns: compiled kernel as Python function """ - import pycuda.autoinit + import pycuda.autoinit # NOQA from pycuda.compiler import SourceModule if argument_dict is None: @@ -58,7 +58,7 @@ def make_python_function(kernel_function_node, argument_dict=None): cache[key] = (args, block_and_thread_numbers) cache_values.append(kwargs) # keep objects alive such that ids remain unique func(*args, **block_and_thread_numbers) - #cuda.Context.synchronize() # useful for debugging, to get errors right after kernel was called + # cuda.Context.synchronize() # useful for debugging, to get errors right after kernel was called wrapper.ast = kernel_function_node wrapper.parameters = kernel_function_node.parameters return wrapper @@ -143,6 +143,3 @@ def _check_arguments(parameter_specification, argument_dict): return list(index_arr_shapes)[0] else: return list(array_shapes)[0] - - - diff --git a/gpucuda/indexing.py b/gpucuda/indexing.py index 738977c0b..1d2a655bf 100644 --- a/gpucuda/indexing.py +++ b/gpucuda/indexing.py @@ -1,8 +1,6 @@ import abc from typing import Tuple - import sympy as sp - from pystencils.astnodes import Conditional, Block from pystencils.slicing import normalize_slice from pystencils.data_types import TypedSymbol, create_type @@ -78,7 +76,7 @@ class BlockIndexing(AbstractIndexing): if AUTO_BLOCK_SIZE_LIMITING: block_size = self.limit_block_size_to_device_maximum(block_size) - + self._blockSize = block_size self._iterationSlice = normalize_slice(iteration_slice, field.spatial_shape) self._dim = field.spatial_dimensions @@ -127,7 +125,8 @@ class BlockIndexing(AbstractIndexing): """ # Get device limits import pycuda.driver as cuda - import pycuda.autoinit + # noinspection PyUnresolvedReferences + import pycuda.autoinit # NOQA da = cuda.device_attribute device = cuda.Context.get_device() @@ -181,7 +180,8 @@ class BlockIndexing(AbstractIndexing): :returns smaller block_size if too many registers are used. """ import pycuda.driver as cuda - import pycuda.autoinit + # noinspection PyUnresolvedReferences + import pycuda.autoinit # NOQA da = cuda.device_attribute if device is None: diff --git a/gpucuda/kernelcreation.py b/gpucuda/kernelcreation.py index 773e68b10..a51801136 100644 --- a/gpucuda/kernelcreation.py +++ b/gpucuda/kernelcreation.py @@ -1,8 +1,8 @@ from functools import partial from pystencils.gpucuda.indexing import BlockIndexing -from pystencils.transformations import resolve_field_accesses, type_all_equations, parse_base_pointer_info, get_common_shape, \ - substitute_array_accesses_with_constants, resolve_buffer_accesses +from pystencils.transformations import resolve_field_accesses, type_all_equations, parse_base_pointer_info, \ + get_common_shape, substitute_array_accesses_with_constants, resolve_buffer_accesses from pystencils.astnodes import Block, KernelFunction, SympyAssignment, LoopOverCoordinate from pystencils.data_types import TypedSymbol, BasicType, StructType from pystencils import Field, FieldType @@ -39,7 +39,8 @@ def create_cuda_kernel(assignments, function_name="kernel", type_info=None, inde iteration_slice.append(slice(ghost_layers, -ghost_layers if ghost_layers > 0 else None)) else: for i in range(len(common_shape)): - iteration_slice.append(slice(ghost_layers[i][0], -ghost_layers[i][1] if ghost_layers[i][1] > 0 else None)) + iteration_slice.append(slice(ghost_layers[i][0], + -ghost_layers[i][1] if ghost_layers[i][1] > 0 else None)) indexing = indexing_creator(field=list(fields_without_buffers)[0], iteration_slice=iteration_slice) @@ -138,5 +139,3 @@ def created_indexed_cuda_kernel(assignments, index_fields, function_name="kernel ast.indexing = indexing ast.compile = partial(make_python_function, ast) return ast - - diff --git a/gpucuda/periodicity.py b/gpucuda/periodicity.py index 551308865..39e3737ff 100644 --- a/gpucuda/periodicity.py +++ b/gpucuda/periodicity.py @@ -1,4 +1,3 @@ -import sympy as sp import numpy as np from pystencils import Field, Assignment from pystencils.slicing import normalize_slice, get_periodic_boundary_src_dst_slices diff --git a/kerncraft_coupling/__init__.py b/kerncraft_coupling/__init__.py index d6dcc635b..33a2ed21f 100644 --- a/kerncraft_coupling/__init__.py +++ b/kerncraft_coupling/__init__.py @@ -1 +1,3 @@ from .kerncraft_interface import PyStencilsKerncraftKernel, KerncraftParameters + +__all__ = ['PyStencilsKerncraftKernel', 'KerncraftParameters'] diff --git a/kerncraft_coupling/generate_benchmark.py b/kerncraft_coupling/generate_benchmark.py index 3e0341076..7f80f743f 100644 --- a/kerncraft_coupling/generate_benchmark.py +++ b/kerncraft_coupling/generate_benchmark.py @@ -1,5 +1,5 @@ from jinja2 import Template -from pystencils.cpu import generate_c +from pystencils.backends.cbackend import generate_c from pystencils.sympyextensions import prod from pystencils.data_types import get_base_type @@ -30,54 +30,52 @@ int main(int argc, char **argv) {%- endif %} {%- for field_name, dataType, size in fields %} - - // Initialization {{field_name}} + + // Initialization {{field_name}} double * {{field_name}} = aligned_malloc(sizeof({{dataType}}) * {{size}}, 32); for (int i = 0; i < {{size}}; ++i) {{field_name}}[i] = 0.23; - + if(var_false) - dummy({{field_name}}); - + dummy({{field_name}}); + {%- endfor %} - - - + + + {%- for constantName, dataType in constants %} - + // Constant {{constantName}} {{dataType}} {{constantName}}; {{constantName}} = 0.23; if(var_false) dummy(& {{constantName}}); - + {%- endfor %} - + int repeat = atoi(argv[1]); - {%- if likwid %} + {%- if likwid %} likwid_markerStartRegion("loop"); {%- endif %} - + for (; repeat > 0; --repeat) { {{kernelName}}({{callArgumentList}}); - - // Dummy calls + + // Dummy calls {%- for field_name, dataType, size in fields %} - if(var_false) dummy({{field_name}}); + if(var_false) dummy({{field_name}}); {%- endfor %} {%- for constantName, dataType in constants %} if(var_false) dummy(&{{constantName}}); {%- endfor %} } - - {%- if likwid %} + + {%- if likwid %} likwid_markerStopRegion("loop"); {%- endif %} - - - - {%- if likwid %} + + {%- if likwid %} likwid_markerClose(); {%- endif %} } diff --git a/kerncraft_coupling/kerncraft_interface.py b/kerncraft_coupling/kerncraft_interface.py index 241cba467..79d494bfd 100644 --- a/kerncraft_coupling/kerncraft_interface.py +++ b/kerncraft_coupling/kerncraft_interface.py @@ -6,8 +6,6 @@ from collections import defaultdict import subprocess import kerncraft import kerncraft.kernel -from kerncraft.machinemodel import MachineModel -from kerncraft.models import ECM, Benchmark from kerncraft.iaca import iaca_analyse_instrumented_binary, iaca_instrumentation from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark from pystencils.astnodes import LoopOverCoordinate, SympyAssignment, ResolvedFieldAccess @@ -94,9 +92,9 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.Kernel): if '-std=c99' not in compiler_args: compiler_args += ['-std=c99'] header_path = kerncraft.get_header_path() - + compiler_cmd = [compiler] + compiler_args + ['-I' + header_path] - + src_file = os.path.join(self.temporary_dir.name, "source.c") asm_file = os.path.join(self.temporary_dir.name, "source.s") iaca_asm_file = os.path.join(self.temporary_dir.name, "source.iaca.s") @@ -109,7 +107,7 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.Kernel): f.write(generate_benchmark(self.ast, likwid=False)) # compile to asm files - subprocess.check_output(compiler_cmd + [src_file, '-S', '-o', asm_file]) + subprocess.check_output(compiler_cmd + [src_file, '-S', '-o', asm_file]) subprocess.check_output(compiler_cmd + [dummy_src_file, '-S', '-o', dummy_asm_file]) with open(asm_file) as read, open(iaca_asm_file, 'w') as write: @@ -147,7 +145,8 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.Kernel): class KerncraftParameters(DotDict): - def __init__(self): + def __init__(self, **kwargs): + super(KerncraftParameters, self).__init__(**kwargs) self['asm_block'] = 'auto' self['asm_increment'] = 0 self['cores'] = 1 @@ -173,4 +172,4 @@ def search_resolved_field_accesses_in_ast(ast): read_accesses = set() write_accesses = set() visit(ast, read_accesses, write_accesses) - return read_accesses, write_accesses \ No newline at end of file + return read_accesses, write_accesses diff --git a/kernelcreation.py b/kernelcreation.py index 25fe50088..75ad8d775 100644 --- a/kernelcreation.py +++ b/kernelcreation.py @@ -1,18 +1,19 @@ +from types import MappingProxyType from pystencils.assignment_collection import AssignmentCollection from pystencils.gpucuda.indexing import indexing_creator_from_params def create_kernel(equations, target='cpu', data_type="double", iteration_slice=None, ghost_layers=None, cpu_openmp=False, cpu_vectorize_info=None, - gpu_indexing='block', gpu_indexing_params={}): + gpu_indexing='block', gpu_indexing_params=MappingProxyType({})): """ Creates abstract syntax tree (AST) of kernel, using a list of update equations. :param equations: either be a plain list of equations or a AssignmentCollection object :param target: 'cpu', 'llvm' or 'gpu' :param data_type: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name to type - :param iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer part of the - field is iterated over + :param iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \ + part of the field is iterated over :param ghost_layers: if left to default, the number of necessary ghost layers is determined automatically a single integer specifies the ghost layer count at all borders, can also be a sequence of pairs [(x_lower_gl, x_upper_gl), .... ] @@ -69,7 +70,7 @@ def create_kernel(equations, target='cpu', data_type="double", iteration_slice=N def create_indexed_kernel(assignments, index_fields, target='cpu', data_type="double", coordinate_names=('x', 'y', 'z'), - cpu_openmp=True, gpu_indexing='block', gpu_indexing_params={}): + cpu_openmp=True, gpu_indexing='block', gpu_indexing_params=MappingProxyType({})): """ Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling. @@ -97,8 +98,9 @@ def create_indexed_kernel(assignments, index_fields, target='cpu', data_type="do raise NotImplementedError("Indexed kernels are not yet supported in LLVM backend") elif target == 'gpu': from pystencils.gpucuda import created_indexed_cuda_kernel - ast = created_indexed_cuda_kernel(assignments, index_fields, type_info=data_type, coordinate_names=coordinate_names, - indexing_creator=indexing_creator_from_params(gpu_indexing, gpu_indexing_params)) + idx_creator = indexing_creator_from_params(gpu_indexing, gpu_indexing_params) + ast = created_indexed_cuda_kernel(assignments, index_fields, type_info=data_type, + coordinate_names=coordinate_names, indexing_creator=idx_creator) return ast else: raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,)) diff --git a/llvm/__init__.py b/llvm/__init__.py index 2bbac481b..506a99921 100644 --- a/llvm/__init__.py +++ b/llvm/__init__.py @@ -1,3 +1,4 @@ -from .kernelcreation import create_kernel, create_indexed_kernel -from .llvmjit import compile_llvm, generate_and_jit, Jit, make_python_function -from .llvm import generate_llvm +from .kernelcreation import create_kernel +from .llvmjit import make_python_function + +__all__ = ['create_kernel', 'make_python_function'] diff --git a/llvm/llvm.py b/llvm/llvm.py index 8f0a8b067..b6a0f5895 100644 --- a/llvm/llvm.py +++ b/llvm/llvm.py @@ -1,3 +1,4 @@ +import sympy as sp import functools from sympy import S, Indexed from sympy.printing.printer import Printer @@ -266,7 +267,7 @@ class LLVMPrinter(Printer): phi_data = [] after_block = self.builder.append_basic_block() for (expr, condition) in piece.args: - if condition == True: # Don't use 'is' use '=='! + if condition == sp.sympify(True): # Don't use 'is' use '=='! phi_data.append((self._print(expr), self.builder.block)) self.builder.branch(after_block) self.builder.position_at_end(after_block) diff --git a/llvm/llvmjit.py b/llvm/llvmjit.py index 571a31b60..c0e1238b8 100644 --- a/llvm/llvmjit.py +++ b/llvm/llvmjit.py @@ -2,9 +2,6 @@ import llvmlite.ir as ir import llvmlite.binding as llvm import numpy as np import ctypes as ct -import subprocess -import shutil - from pystencils.data_types import create_composite_type_from_string from ..data_types import to_ctypes, ctypes_from_llvm from .llvm import generate_llvm diff --git a/runhelper/__init__.py b/runhelper/__init__.py index 9a854cea6..7bb06cc5d 100644 --- a/runhelper/__init__.py +++ b/runhelper/__init__.py @@ -1,2 +1,4 @@ from pystencils.runhelper.db import Database from pystencils.runhelper.parameterstudy import ParameterStudy + +__all__ = ['Database', 'ParameterStudy'] diff --git a/slicing.py b/slicing.py index 2ebd985ff..15212c1f5 100644 --- a/slicing.py +++ b/slicing.py @@ -231,7 +231,7 @@ def slice_intersection(slice1, slice2): slice2 = [s if not isinstance(s, int) else slice(s, s + 1, None) for s in slice2] new_min = [max(s1.start, s2.start) for s1, s2 in zip(slice1, slice2)] - new_max = [min(s1.stop, s2.stop) for s1, s2 in zip(slice1, slice2)] + new_max = [min(s1.stop, s2.stop) for s1, s2 in zip(slice1, slice2)] if any(max_p - min_p < 0 for min_p, max_p in zip(new_min, new_max)): return None diff --git a/sympyextensions.py b/sympyextensions.py index fc936272f..55859f0f5 100644 --- a/sympyextensions.py +++ b/sympyextensions.py @@ -506,24 +506,23 @@ def sort_assignments_topologically(assignments: Sequence[Assignment]) -> List[As def assignments_from_python_function(func, **kwargs): - """ - Mechanism to simplify the generation of a list of sympy equations. + """Mechanism to simplify the generation of a list of sympy equations. + Introduces a special "assignment operator" written as "@=". Each line containing this operator gives an equation in the result list. Note that executing this function normally yields an error. - + Additionally the shortcut object 'S' is available to quickly create new sympy symbols. - - Example: - - >>> def my_kernel(s): - ... from pystencils import Field - ... f = Field.create_generic('f', spatial_dimensions=2, index_dimensions=0) - ... g = f.new_field_with_different_name('g') - ... - ... s.neighbors @= f[0,1] + f[1,0] - ... g[0,0] @= s.neighbors + f[0,0] - >>> assignments_from_python_function(my_kernel) - [Assignment(neighbors, f_E + f_N), Assignment(g_C, f_C + neighbors)] + + Examples: + >>> def my_kernel(s): + ... from pystencils import Field + ... f = Field.create_generic('f', spatial_dimensions=2, index_dimensions=0) + ... g = f.new_field_with_different_name('g') + ... + ... s.neighbors @= f[0,1] + f[1,0] + ... g[0,0] @= s.neighbors + f[0,0] + >>> assignments_from_python_function(my_kernel) + [Assignment(neighbors, f_E + f_N), Assignment(g_C, f_C + neighbors)] """ import inspect import re diff --git a/test_simplification_strategy.py b/test_simplification_strategy.py index 9c15551dd..8087c65b8 100644 --- a/test_simplification_strategy.py +++ b/test_simplification_strategy.py @@ -10,9 +10,9 @@ def test_simplification_strategy(): a0, a1, a2, a3 = sp.symbols("a_:4") subexpressions = [ - Assignment(s0, 2*a + 2*b), - Assignment(s1, 2 * a + 2 * b + 2*c), - Assignment(s2, 2 * a + 2 * b + 2*c + 2*d), + Assignment(s0, 2 * a + 2 * b), + Assignment(s1, 2 * a + 2 * b + 2 * c), + Assignment(s2, 2 * a + 2 * b + 2 * c + 2 * d), ] main = [ Assignment(a0, s0 + s1), diff --git a/timeloop.py b/timeloop.py index cc7a2fe54..9a7229933 100644 --- a/timeloop.py +++ b/timeloop.py @@ -85,7 +85,3 @@ class TimeLoop: for f in self._timeStepFunctions: f() self.time_steps_run += 1 - - - - diff --git a/transformations/transformations.py b/transformations.py similarity index 89% rename from transformations/transformations.py rename to transformations.py index 9e0fb3f58..831e4af89 100644 --- a/transformations/transformations.py +++ b/transformations.py @@ -1,13 +1,13 @@ from collections import defaultdict, OrderedDict from copy import deepcopy - +from types import MappingProxyType import sympy as sp from sympy.logic.boolalg import Boolean from sympy.tensor import IndexedBase - from pystencils.assignment import Assignment from pystencils.field import Field, FieldType, offset_component_to_direction_string -from pystencils.data_types import TypedSymbol, create_type, PointerType, StructType, get_base_type, cast_func +from pystencils.data_types import TypedSymbol, PointerType, StructType, get_base_type, cast_func, \ + pointer_arithmetic_func, get_type_of_expression, collate_types from pystencils.slicing import normalize_slice import pystencils.astnodes as ast @@ -232,9 +232,9 @@ def parse_base_pointer_info(base_pointer_specification, loop_order, field): def substitute_array_accesses_with_constants(ast_node): """Substitutes all instances of Indexed (array accesses) that are not field accesses with constants. - Benchmarks showed that using an array access as loop bound or in pointer computations cause some compilers to do - less optimizations. - This transformation should be after field accesses have been resolved (since they introduce array accesses) and + Benchmarks showed that using an array access as loop bound or in pointer computations cause some compilers to do + less optimizations. + This transformation should be after field accesses have been resolved (since they introduce array accesses) and before constants are moved before the loops. """ @@ -331,7 +331,8 @@ def resolve_buffer_accesses(ast_node, base_buffer_index, read_only_field_names=s def resolve_field_accesses(ast_node, read_only_field_names=set(), - field_to_base_pointer_info={}, field_to_fixed_coordinates={}): + field_to_base_pointer_info=MappingProxyType({}), + field_to_fixed_coordinates=MappingProxyType({})): """ Substitutes :class:`pystencils.field.Field.Access` nodes by array indexing @@ -632,8 +633,9 @@ def simplify_boolean_expression(expr, single_variable_ranges): return visit(expr) -def simplify_conditionals(node, loop_conditionals={}): +def simplify_conditionals(node, loop_conditionals=MappingProxyType({})): """Simplifies/Removes conditions inside loops that depend on the loop counter.""" + loop_conditionals = loop_conditionals.copy() if isinstance(node, ast.LoopOverCoordinate): ctr_sym = node.loop_counter_symbol loop_conditionals[ctr_sym] = sp.And(ctr_sym >= node.start, ctr_sym < node.stop) @@ -684,8 +686,8 @@ def type_all_equations(eqs, type_for_symbol): :param eqs: list of equations :param type_for_symbol: dict mapping symbol names to types. Types are strings of C types like 'int' or 'double' - :return: ``fields_read, fields_written, typed_equations`` set of read fields, set of written fields, list of equations - where symbols have been replaced by typed symbols + :return: ``fields_read, fields_written, typed_equations`` set of read fields, set of written fields, + list of equations where symbols have been replaced by typed symbols """ if isinstance(type_for_symbol, str) or not hasattr(type_for_symbol, '__getitem__'): type_for_symbol = typing_from_sympy_inspection(eqs, type_for_symbol) @@ -741,6 +743,92 @@ def type_all_equations(eqs, type_for_symbol): return fields_read, fields_written, typed_equations +def insert_casts(node): + """Checks the types and inserts casts and pointer arithmetic where necessary + + :param node: the head node of the ast + :return: modified ast + """ + def cast(zipped_args_types, target_dtype): + """ + Adds casts to the arguments if their type differs from the target type + :param zipped_args_types: a zipped list of args and types + :param target_dtype: The target data type + :return: args with possible casts + """ + casted_args = [] + for argument, data_type in zipped_args_types: + if data_type.numpy_dtype != target_dtype.numpy_dtype: # ignoring const + casted_args.append(cast_func(argument, target_dtype)) + else: + casted_args.append(argument) + return casted_args + + def pointer_arithmetic(expr_args): + """ + Creates a valid pointer arithmetic function + :param expr_args: Arguments of the add expression + :return: pointer_arithmetic_func + """ + pointer = None + new_args = [] + for arg, data_type in expr_args: + if data_type.func is PointerType: + assert pointer is None + pointer = arg + for arg, data_type in expr_args: + if arg != pointer: + assert data_type.is_int() or data_type.is_uint() + new_args.append(arg) + new_args = sp.Add(*new_args) if len(new_args) > 0 else new_args + return pointer_arithmetic_func(pointer, new_args) + + if isinstance(node, sp.AtomicExpr): + return node + args = [] + for arg in node.args: + args.append(insert_casts(arg)) + # TODO indexed, LoopOverCoordinate + if node.func in (sp.Add, sp.Mul, sp.Or, sp.And, sp.Pow, sp.Eq, sp.Ne, sp.Lt, sp.Le, sp.Gt, sp.Ge): + # TODO optimize pow, don't cast integer on double + types = [get_type_of_expression(arg) for arg in args] + assert len(types) > 0 + target = collate_types(types) + zipped = list(zip(args, types)) + if target.func is PointerType: + assert node.func is sp.Add + return pointer_arithmetic(zipped) + else: + return node.func(*cast(zipped, target)) + elif node.func is ast.SympyAssignment: + lhs = args[0] + rhs = args[1] + target = get_type_of_expression(lhs) + if target.func is PointerType: + return node.func(*args) # TODO fix, not complete + else: + return node.func(lhs, *cast([(rhs, get_type_of_expression(rhs))], target)) + elif node.func is ast.ResolvedFieldAccess: + return node + elif node.func is ast.Block: + for old_arg, new_arg in zip(node.args, args): + node.replace(old_arg, new_arg) + return node + elif node.func is ast.LoopOverCoordinate: + for old_arg, new_arg in zip(node.args, args): + node.replace(old_arg, new_arg) + return node + elif node.func is sp.Piecewise: + expressions = [expr for (expr, _) in args] + types = [get_type_of_expression(expr) for expr in expressions] + target = collate_types(types) + zipped = list(zip(expressions, types)) + casted_expressions = cast(zipped, target) + args = [arg.func(*[expr, arg.cond]) for (arg, expr) in zip(args, casted_expressions)] + + return node.func(*args) + + # --------------------------------------- Helper Functions ------------------------------------------------------------- diff --git a/transformations/__init__.py b/transformations/__init__.py deleted file mode 100644 index a8ba7d85a..000000000 --- a/transformations/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .transformations import * -from .stage2 import * diff --git a/transformations/stage2.py b/transformations/stage2.py deleted file mode 100644 index f5664e1bf..000000000 --- a/transformations/stage2.py +++ /dev/null @@ -1,89 +0,0 @@ -import sympy as sp -from pystencils.data_types import PointerType, get_type_of_expression, collate_types, cast_func, pointer_arithmetic_func -import pystencils.astnodes as ast - - -def insert_casts(node): - """ - Checks the types and inserts casts and pointer arithmetic where necessary - :param node: the head node of the ast - :return: modified ast - """ - def cast(zipped_args_types, target_dtype): - """ - Adds casts to the arguments if their type differs from the target type - :param zipped_args_types: a zipped list of args and types - :param target_dtype: The target data type - :return: args with possible casts - """ - casted_args = [] - for argument, data_type in zipped_args_types: - if data_type.numpy_dtype != target_dtype.numpy_dtype: # ignoring const - casted_args.append(cast_func(argument, target_dtype)) - else: - casted_args.append(argument) - return casted_args - - def pointer_arithmetic(expr_args): - """ - Creates a valid pointer arithmetic function - :param expr_args: Arguments of the add expression - :return: pointer_arithmetic_func - """ - pointer = None - new_args = [] - for arg, data_type in expr_args: - if data_type.func is PointerType: - assert pointer is None - pointer = arg - for arg, data_type in expr_args: - if arg != pointer: - assert data_type.is_int() or data_type.is_uint() - new_args.append(arg) - new_args = sp.Add(*new_args) if len(new_args) > 0 else new_args - return pointer_arithmetic_func(pointer, new_args) - - if isinstance(node, sp.AtomicExpr): - return node - args = [] - for arg in node.args: - args.append(insert_casts(arg)) - # TODO indexed, LoopOverCoordinate - if node.func in (sp.Add, sp.Mul, sp.Or, sp.And, sp.Pow, sp.Eq, sp.Ne, sp.Lt, sp.Le, sp.Gt, sp.Ge): - # TODO optimize pow, don't cast integer on double - types = [get_type_of_expression(arg) for arg in args] - assert len(types) > 0 - target = collate_types(types) - zipped = list(zip(args, types)) - if target.func is PointerType: - assert node.func is sp.Add - return pointer_arithmetic(zipped) - else: - return node.func(*cast(zipped, target)) - elif node.func is ast.SympyAssignment: - lhs = args[0] - rhs = args[1] - target = get_type_of_expression(lhs) - if target.func is PointerType: - return node.func(*args) # TODO fix, not complete - else: - return node.func(lhs, *cast([(rhs, get_type_of_expression(rhs))], target)) - elif node.func is ast.ResolvedFieldAccess: - return node - elif node.func is ast.Block: - for old_arg, new_arg in zip(node.args, args): - node.replace(old_arg, new_arg) - return node - elif node.func is ast.LoopOverCoordinate: - for old_arg, new_arg in zip(node.args, args): - node.replace(old_arg, new_arg) - return node - elif node.func is sp.Piecewise: - expressions = [expr for (expr, _) in args] - types = [get_type_of_expression(expr) for expr in expressions] - target = collate_types(types) - zipped = list(zip(expressions, types)) - casted_expressions = cast(zipped, target) - args = [arg.func(*[expr, arg.cond]) for (arg, expr) in zip(args, casted_expressions)] - - return node.func(*args) diff --git a/vectorization.py b/vectorization.py index d8220a36d..f8870c52a 100644 --- a/vectorization.py +++ b/vectorization.py @@ -109,4 +109,3 @@ def insert_vector_casts(ast_node): lhs_type = assignment.lhs.args[1] if type(lhs_type) is VectorType and type(rhs_type) is not VectorType: assignment.rhs = cast_func(assignment.rhs, lhs_type) - -- GitLab