Commit 4b2bd4d3 authored by Markus Holzer's avatar Markus Holzer
Browse files

Fixed second stage pipeline

parent 1597456b
......@@ -11,11 +11,11 @@ Creating kernels
.. autoclass:: pystencils.CreateKernelConfig
:members:
.. autofunction:: pystencils.create_domain_kernel
.. autofunction:: pystencils.kernelcreation.create_domain_kernel
.. autofunction:: pystencils.create_indexed_kernel
.. autofunction:: pystencils.kernelcreation.create_indexed_kernel
.. autofunction:: pystencils.create_staggered_kernel
.. autofunction:: pystencils.kernelcreation.create_staggered_kernel
Code printing
......
......@@ -495,8 +495,8 @@ class CustomSympyPrinter(CCodePrinter):
known = self.known_functions[arg.__class__.__name__.lower()]
code = self._print(arg)
return code.replace(known, f"{known}f")
elif isinstance(arg, sp.Pow) and data_type == BasicType('float32'):
known = ['sqrt', 'cbrt', 'pow']
elif isinstance(arg, (sp.Pow, sp.exp)) and data_type == BasicType('float32'):
known = ['sqrt', 'cbrt', 'pow', 'exp']
code = self._print(arg)
for k in known:
if k in code:
......@@ -673,8 +673,11 @@ class VectorizedCustomSympyPrinter(CustomSympyPrinter):
instruction = self.instruction_set['loadA'] if aligned else self.instruction_set['loadU']
return instruction.format(f"& {self._print(arg)}", **self._kwargs)
elif expr.func == DivFunc:
return self.instruction_set['/'].format(self._print(expr.divisor), self._print(expr.dividend),
**self._kwargs)
result = self._scalarFallback('_print_Function', expr)
if not result:
result = self.instruction_set['/'].format(self._print(expr.divisor), self._print(expr.dividend),
**self._kwargs)
return result
elif expr.func == fast_division:
result = self._scalarFallback('_print_Function', expr)
if not result:
......
import graphviz
from graphviz import Digraph, lang
try:
from graphviz import Digraph
import graphviz.quoting as quote
except ImportError:
from graphviz import Digraph
import graphviz.lang as quote
from sympy.printing.printer import Printer
......@@ -12,7 +17,7 @@ class DotPrinter(Printer):
super(DotPrinter, self).__init__()
self._node_to_str_function = node_to_str_function
self.dot = Digraph(**kwargs)
self.dot.quote_edge = lang.quote
self.dot.quote_edge = quote.quote
def _print_KernelFunction(self, func):
self.dot.node(str(id(func)), style='filled', fillcolor='#a056db', label=self._node_to_str_function(func))
......
......@@ -75,23 +75,25 @@ class CreateKernelConfig:
"""
gpu_indexing: str = 'block'
"""
Either 'block' or 'line' , or custom indexing class, see `AbstractIndexing`
Either 'block' or 'line' , or custom indexing class, see `pystencils.gpucuda.AbstractIndexing`
"""
gpu_indexing_params: MappingProxyType = field(default=MappingProxyType({}))
"""
Dict with indexing parameters (constructor parameters of indexing class)
e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }'.
"""
# TODO rework this docstring
default_assignment_simplifications: bool = False
"""
If `True` default simplifications are first performed on the Assignments. If problems occur during the
simplification a warning will be thrown.
Furthermore, it is essential to know that this is a two-stage process. The first stage of the process acts
on the level of the `AssignmentCollection`. In this part, `create_simplification_strategy`
from pystencils.simplificationfactory will be used to apply optimisations like insertion of constants to
remove pressure from the registers. Thus the first part of the optimisations can only be executed if
an `AssignmentCollection` is passed. The second part of the optimisation acts on the level of each Assignment
individually. In this stage, all optimisations from `sympy.codegen.rewriting.optims_c99` are applied
simplification a warning will be thrown.
Furthermore, it is essential to know that this is a two-stage process. The first stage of the process acts
on the level of the `pystencils.AssignmentCollection`. In this part,
`pystencil.simp.create_simplification_strategy` from pystencils.simplificationfactory will be used to
apply optimisations like insertion of constants to
remove pressure from the registers. Thus the first part of the optimisations can only be executed if
an `AssignmentCollection` is passed. The second part of the optimisation acts on the level of each Assignment
individually. In this stage, all optimisations from `sympy.codegen.rewriting.optims_c99` are applied
to each Assignment. Thus this stage can also be applied if a list of Assignments is passed.
"""
cpu_prepend_optimizations: List[Callable] = field(default_factory=list)
......
......@@ -10,7 +10,12 @@ from pystencils.kernel_wrapper import KernelWrapper
def to_dot(expr: sp.Expr, graph_style: Optional[Dict[str, Any]] = None, short=True):
"""Show a sympy or pystencils AST as dot graph"""
from pystencils.astnodes import Node
import graphviz
try:
import graphviz
except ImportError:
print("graphviz is not installed. Visualizing the AST is not available")
return
graph_style = {} if graph_style is None else graph_style
if isinstance(expr, Node):
......
......@@ -216,7 +216,8 @@ class TypeAdder:
else:
new_args.append(a)
return expr.func(*new_args) if new_args else expr, collated_type
elif isinstance(expr, (sp.Pow, InverseTrigonometricFunction, TrigonometricFunction, HyperbolicFunction)):
elif isinstance(expr, (sp.Pow, sp.exp, InverseTrigonometricFunction, TrigonometricFunction,
HyperbolicFunction)):
args_types = [self.figure_out_type(arg) for arg in expr.args]
collated_type = collate_types([t for _, t in args_types])
new_args = [a if t.dtype_eq(collated_type) else CastFunc(a, collated_type) for a, t in args_types]
......
......@@ -33,7 +33,8 @@ def test_two_arguments(dtype, func, target):
dh.run_kernel(kernel)
dh.all_to_cpu()
np.testing.assert_allclose(dh.gather_array("x")[0, 0], float(func(1.0, 2.0).evalf()))
np.testing.assert_allclose(dh.gather_array("x")[0, 0], float(func(1.0, 2.0).evalf()),
13 if dtype == 'float64' else 5)
@pytest.mark.parametrize('dtype', ["float64", "float32"])
......
......@@ -4,6 +4,7 @@ import pytest
import pystencils.config
import sympy as sp
import pystencils as ps
import numpy as np
from pystencils.simp import subexpression_substitution_in_main_assignments
from pystencils.simp import add_subexpressions_for_divisions
......@@ -143,29 +144,27 @@ def test_add_subexpressions_for_field_reads():
@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU))
@pytest.mark.parametrize('simplification', (True, False))
@pytest.mark.parametrize('dtype', ('float32', 'float64'))
@pytest.mark.skipif((vs.major, vs.minor, vs.micro) == (3, 8, 2), reason="does not work on python 3.8.2 for some reason")
def test_sympy_optimizations(target, simplification):
def test_sympy_optimizations(target, dtype):
if target == ps.Target.GPU:
pytest.importorskip("pycuda")
src, dst = ps.fields('src, dst: float32[2d]')
src, dst = ps.fields(f'src, dst: {dtype}[2d]')
# Triggers Sympy's expm1 optimization
# Sympy's expm1 optimization is tedious to use and the behaviour is highly depended on the sympy version. In
# some cases the exp expression has to be encapsulated in brackets or multiplied with 1 or 1.0
# for sympy to work properly ...
assignments = ps.AssignmentCollection({
src[0, 0]: 1.0 * (sp.exp(dst[0, 0]) - 1)
})
config = pystencils.config.CreateKernelConfig(target=target, default_assignment_simplifications=simplification)
config = pystencils.config.CreateKernelConfig(target=target, default_number_float=dtype)
ast = ps.create_kernel(assignments, config=config)
ps.show_code(ast)
code = ps.get_code_str(ast)
if simplification:
assert 'expm1(' in code
else:
assert 'expm1(' not in code
if dtype == 'float32':
assert 'expf(' in code
elif dtype == 'float64':
assert 'exp(' in code
@pytest.mark.parametrize('target', (ps.Target.CPU, ps.Target.GPU))
......@@ -176,7 +175,7 @@ def test_evaluate_constant_terms(target, simplification):
pytest.importorskip("pycuda")
src, dst = ps.fields('src, dst: float32[2d]')
# Triggers Sympy's cos optimization
# cos of a number will always be simplified
assignments = ps.AssignmentCollection({
src[0, 0]: -sp.cos(1) + dst[0, 0]
})
......@@ -184,8 +183,4 @@ def test_evaluate_constant_terms(target, simplification):
config = pystencils.config.CreateKernelConfig(target=target, default_assignment_simplifications=simplification)
ast = ps.create_kernel(assignments, config=config)
code = ps.get_code_str(ast)
if simplification:
assert 'cos(' not in code
else:
assert 'cos(' in code
print(code)
assert 'cos(' not in code
......@@ -195,9 +195,9 @@ def test_piecewise3(instruction_set=instruction_set):
g[0, 0] @= 1.0 / (s.b + s.k) if f[0, 0] > 0.0 else 1.0
ast = ps.create_kernel(test_kernel)
ps.show_code(ast)
# ps.show_code(ast)
vectorize(ast, instruction_set=instruction_set)
ps.show_code(ast)
# ps.show_code(ast)
ast.compile()
......
......@@ -61,24 +61,29 @@ def test_vectorized_abs(instruction_set, dtype):
@pytest.mark.parametrize('dtype', ('float', 'double'))
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
def test_strided(instruction_set, dtype):
f, g = ps.fields(f"f, g : float{64 if dtype == 'double' else 32}[2D]")
npdtype = np.float64 if dtype == 'double' else np.float32
f, g = ps.fields(f"f, g : float{64 if dtype=='double' else 32}[2D]")
update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)]
if 'storeS' not in get_vector_instruction_set(dtype, instruction_set) and not instruction_set in ['avx512', 'rvv'] and not instruction_set.startswith('sve'):
if 'storeS' not in get_vector_instruction_set(dtype, instruction_set) and instruction_set not in ['avx512', 'rvv'] and not instruction_set.startswith('sve'):
with pytest.warns(UserWarning) as warn:
config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set})
config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set},
default_number_float=npdtype)
ast = ps.create_kernel(update_rule, config=config)
assert 'Could not vectorize loop' in warn[0].message.args[0]
else:
with pytest.warns(None) as warn:
config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set})
config = pystencils.config.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set},
default_number_float=npdtype)
ast = ps.create_kernel(update_rule, config=config)
assert len(warn) == 0
# ps.show_code(ast)
func = ast.compile()
ref_func = ps.create_kernel(update_rule).compile()
arr = np.random.random((23 + 2, 17 + 2)).astype(np.float64 if dtype == 'double' else np.float32)
dst = np.zeros_like(arr, dtype=np.float64 if dtype == 'double' else np.float32)
ref = np.zeros_like(arr, dtype=np.float64 if dtype == 'double' else np.float32)
arr = np.random.random((23 + 2, 17 + 2)).astype(npdtype)
dst = np.zeros_like(arr, dtype=npdtype)
ref = np.zeros_like(arr, dtype=npdtype)
func(g=dst, f=arr)
ref_func(g=ref, f=arr)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment