Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 1243 additions and 151 deletions
/*
Copyright 2021, Michael Kuron.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <altivec.h>
#undef vector
#undef bool
inline void cachelineZero(void * p) {
#ifdef __xlC__
__dcbz(p);
#else
__asm__ volatile("dcbz 0, %0"::"r"(p):"memory");
#endif
}
inline size_t _cachelineSize() {
// allocate and fill with ones
const size_t max_size = 0x100000;
uint8_t data[2*max_size];
for (size_t i = 0; i < 2*max_size; ++i) {
data[i] = 0xff;
}
// find alignment offset
size_t offset = max_size - ((uintptr_t) data) % max_size;
// zero a cacheline
cachelineZero((void*) (data + offset));
// make sure that at least one byte was zeroed
if (data[offset] != 0) {
return SIZE_MAX;
}
// make sure that nothing was zeroed before the pointer
if (data[offset-1] == 0) {
return SIZE_MAX;
}
// find the last byte that was zeroed
for (size_t size = 1; size < max_size; ++size) {
if (data[offset + size] != 0) {
return size;
}
}
// too much was zeroed
return SIZE_MAX;
}
inline size_t cachelineSize() {
static size_t size = _cachelineSize();
return size;
}
/*
Copyright 2023, Michael Kuron.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
inline void cachelineZero(void * p) {
#ifdef __riscv_zicboz
__asm__ volatile("cbo.zero (%0)"::"r"(p):"memory");
#endif
}
inline size_t _cachelineSize() {
// allocate and fill with ones
const size_t max_size = 0x100000;
uint8_t data[2*max_size];
for (size_t i = 0; i < 2*max_size; ++i) {
data[i] = 0xff;
}
// find alignment offset
size_t offset = max_size - ((uintptr_t) data) % max_size;
// zero a cacheline
cachelineZero((void*) (data + offset));
// make sure that at least one byte was zeroed
if (data[offset] != 0) {
return SIZE_MAX;
}
// make sure that nothing was zeroed before the pointer
if (data[offset-1] == 0) {
return SIZE_MAX;
}
// find the last byte that was zeroed
for (size_t size = 1; size < max_size; ++size) {
if (data[offset + size] != 0) {
return size;
}
}
// too much was zeroed
return SIZE_MAX;
}
inline size_t cachelineSize() {
#ifdef __riscv_zicboz
static size_t size = _cachelineSize();
return size;
#else
return SIZE_MAX;
#endif
}
# TODO #47 move to a module functions
import numpy as np
import sympy as sp
from pystencils.data_types import cast_func, collate_types, create_type, get_type_of_expression
from pystencils.typing import CastFunc, collate_types, create_type, get_type_of_expression
from pystencils.sympyextensions import is_integer_sequence
......@@ -12,21 +13,29 @@ class IntegerFunctionTwoArgsMixIn(sp.Function):
args = []
for a in (arg1, arg2):
if isinstance(a, sp.Number) or isinstance(a, int):
args.append(cast_func(a, create_type("int")))
args.append(CastFunc(a, create_type("int")))
elif isinstance(a, np.generic):
args.append(cast_func(a, a.dtype))
args.append(CastFunc(a, a.dtype))
else:
args.append(a)
for a in args:
try:
type = get_type_of_expression(a)
if not type.is_int():
raise ValueError("Argument to integer function is not an int but " + str(type))
dtype = get_type_of_expression(a)
if not dtype.is_int():
raise ValueError("Argument to integer function is not an int but " + str(dtype))
except NotImplementedError:
raise ValueError("Integer functions can only be constructed with typed expressions")
return super().__new__(cls, *args)
def _eval_evalf(self, *pargs, **kwargs):
arg1 = self.args[0].evalf(*pargs, **kwargs) if hasattr(self.args[0], 'evalf') else self.args[0]
arg2 = self.args[1].evalf(*pargs, **kwargs) if hasattr(self.args[1], 'evalf') else self.args[1]
return self._eval_op(arg1, arg2)
def _eval_op(self, arg1, arg2):
return self
# noinspection PyPep8Naming
class bitwise_xor(IntegerFunctionTwoArgsMixIn):
......@@ -55,7 +64,9 @@ class bitwise_or(IntegerFunctionTwoArgsMixIn):
# noinspection PyPep8Naming
class int_div(IntegerFunctionTwoArgsMixIn):
pass
def _eval_op(self, arg1, arg2):
return int(arg1 // arg2)
# noinspection PyPep8Naming
......
......@@ -4,7 +4,8 @@ import islpy as isl
import sympy as sp
import pystencils.astnodes as ast
from pystencils.transformations import parents_of_type
from pystencils.typing import parents_of_type
from pystencils.backends.cbackend import CustomSympyPrinter
def remove_brackets(s):
......@@ -36,13 +37,12 @@ def isl_iteration_set(node: ast.Node):
loop_start_str = remove_brackets(str(loop.start))
loop_stop_str = remove_brackets(str(loop.stop))
ctr_name = loop.loop_counter_name
set_string_description = "{} >= {} and {} < {}".format(ctr_name, loop_start_str, ctr_name, loop_stop_str)
set_string_description = f"{ctr_name} >= {loop_start_str} and {ctr_name} < {loop_stop_str}"
conditions.append(remove_brackets(set_string_description))
symbol_names = ','.join(degrees_of_freedom)
condition_str = ' and '.join(conditions)
set_description = "{{ [{symbol_names}] : {condition_str} }}".format(symbol_names=symbol_names,
condition_str=condition_str)
set_description = f"{{ [{symbol_names}] : {condition_str} }}"
return degrees_of_freedom, isl.BasicSet(set_description)
......@@ -52,12 +52,13 @@ def simplify_loop_counter_dependent_conditional(conditional):
dofs_in_loops, iteration_set = isl_iteration_set(conditional)
if dofs_in_condition.issubset(dofs_in_loops):
symbol_names = ','.join(dofs_in_loops)
condition_str = remove_brackets(str(conditional.condition_expr))
condition_set = isl.BasicSet("{{ [{symbol_names}] : {condition_str} }}".format(symbol_names=symbol_names,
condition_str=condition_str))
condition_str = CustomSympyPrinter().doprint(conditional.condition_expr)
condition_str = remove_brackets(condition_str)
condition_set = isl.BasicSet(f"{{ [{symbol_names}] : {condition_str} }}")
if condition_set.is_empty():
conditional.replace_by_false_block()
return
intersection = iteration_set.intersect(condition_set)
if intersection.is_empty():
......
......@@ -7,67 +7,6 @@ from IPython.display import HTML
import pystencils.plot as plt
__all__ = ['log_progress', 'make_imshow_animation', 'display_animation', 'set_display_mode']
def log_progress(sequence, every=None, size=None, name='Items'):
"""Copied from https://github.com/alexanderkuk/log-progress"""
from ipywidgets import IntProgress, HTML, VBox
from IPython.display import display
is_iterator = False
if size is None:
try:
size = len(sequence)
except TypeError:
is_iterator = True
if size is not None:
if every is None:
if size <= 200:
every = 1
else:
every = int(size / 200) # every 0.5%
else:
assert every is not None, 'sequence is iterator, set every'
if is_iterator:
progress = IntProgress(min=0, max=1, value=1)
progress.bar_style = 'info'
else:
progress = IntProgress(min=0, max=size, value=0)
label = HTML()
box = VBox(children=[label, progress])
display(box)
index = 0
try:
for index, record in enumerate(sequence, 1):
if index == 1 or index % every == 0:
if is_iterator:
label.value = '{name}: {index} / ?'.format(
name=name,
index=index
)
else:
progress.value = index
label.value = u'{name}: {index} / {size}'.format(
name=name,
index=index,
size=size
)
yield record
except:
progress.bar_style = 'danger'
raise
else:
progress.bar_style = 'success'
progress.value = index
label.value = "{name}: {index}".format(
name=name,
index=str(index or '?')
)
VIDEO_TAG = """<video controls width="80%">
<source src="data:video/x-m4v;base64,{0}" type="video/mp4">
Your browser does not support the video tag.
......
from collections import namedtuple, defaultdict
from typing import Union
import sympy as sp
from sympy.codegen import Assignment
from pystencils.simp import AssignmentCollection
from pystencils import astnodes as ast, TypedSymbol
from pystencils.field import Field
from pystencils.node_collection import NodeCollection
from pystencils.transformations import NestedScopes
# TODO use this in Constraint Checker
accepted_functions = [
sp.Pow,
sp.sqrt,
sp.log,
# TODO trigonometric functions (and whatever tests will fail)
]
class KernelConstraintsCheck:
# TODO: proper specification
# TODO: More checks :)
"""Checks if the input to create_kernel is valid.
Test the following conditions:
- SSA Form for pure symbols:
- Every pure symbol may occur only once as left-hand-side of an assignment
- Every pure symbol that is read, may not be written to later
- Independence / Parallelization condition:
- a field that is written may only be read at exact the same spatial position
(Pure symbols are symbols that are not Field.Accesses)
"""
FieldAndIndex = namedtuple('FieldAndIndex', ['field', 'index'])
def __init__(self, check_independence_condition=True, check_double_write_condition=True):
self.scopes = NestedScopes()
self.field_reads = defaultdict(set)
self.field_writes = defaultdict(set)
self.fields_read = set()
self.check_independence_condition = check_independence_condition
self.check_double_write_condition = check_double_write_condition
def visit(self, obj):
if isinstance(obj, (AssignmentCollection, NodeCollection)):
[self.visit(e) for e in obj.all_assignments]
elif isinstance(obj, list) or isinstance(obj, tuple):
[self.visit(e) for e in obj]
elif isinstance(obj, (sp.Eq, ast.SympyAssignment, Assignment)):
self.process_assignment(obj)
elif isinstance(obj, ast.Conditional):
self.scopes.push()
# Disable double write check inside conditionals
# would be triggered by e.g. in-kernel boundaries
old_double_write = self.check_double_write_condition
old_independence_condition = self.check_independence_condition
self.check_double_write_condition = False
self.check_independence_condition = False
if obj.false_block:
self.visit(obj.false_block)
self.process_expression(obj.condition_expr)
self.process_expression(obj.true_block)
self.check_double_write_condition = old_double_write
self.check_independence_condition = old_independence_condition
self.scopes.pop()
elif isinstance(obj, ast.Block):
self.scopes.push()
[self.visit(e) for e in obj.args]
self.scopes.pop()
elif isinstance(obj, ast.Node) and not isinstance(obj, ast.LoopOverCoordinate):
pass
else:
raise ValueError(f'Invalid object in kernel {type(obj)}')
def process_assignment(self, assignment: Union[sp.Eq, ast.SympyAssignment, Assignment]):
# for checks it is crucial to process rhs before lhs to catch e.g. a = a + 1
self.process_expression(assignment.rhs)
self.process_lhs(assignment.lhs)
def process_expression(self, rhs):
# TODO constraint for accepted functions, see TODO above
self.update_accesses_rhs(rhs)
if isinstance(rhs, Field.Access):
self.fields_read.add(rhs.field)
self.fields_read.update(rhs.indirect_addressing_fields)
else:
for arg in rhs.args:
self.process_expression(arg)
@property
def fields_written(self):
"""
Return all rhs fields
"""
return set(k.field for k, v in self.field_writes.items() if len(v))
def process_lhs(self, lhs: Union[Field.Access, TypedSymbol, sp.Symbol]):
assert isinstance(lhs, sp.Symbol)
self.update_accesses_lhs(lhs)
def update_accesses_lhs(self, lhs):
if isinstance(lhs, Field.Access):
fai = self.FieldAndIndex(lhs.field, lhs.index)
if self.check_double_write_condition and lhs.offsets in self.field_writes[fai]:
raise ValueError(f"Field {lhs.field.name} is written twice at the same location")
self.field_writes[fai].add(lhs.offsets)
if self.check_double_write_condition and len(self.field_writes[fai]) > 1:
raise ValueError(
f"Field {lhs.field.name} is written at two different locations")
if fai in self.field_reads:
reads = tuple(self.field_reads[fai])
if len(reads) > 1 or lhs.offsets != reads[0]:
if self.check_independence_condition:
raise ValueError(f"Field {lhs.field.name} is written at different location than it was read. "
f"This means the resulting kernel would not be thread safe")
elif isinstance(lhs, sp.Symbol):
if self.scopes.is_defined_locally(lhs):
raise ValueError(f"Assignments not in SSA form, multiple assignments to {lhs.name}")
if lhs in self.scopes.free_parameters:
raise ValueError(f"Symbol {lhs.name} is written, after it has been read")
self.scopes.define_symbol(lhs)
def update_accesses_rhs(self, rhs):
if isinstance(rhs, Field.Access) and self.check_independence_condition:
fai = self.FieldAndIndex(rhs.field, rhs.index)
writes = self.field_writes[fai]
self.field_reads[fai].add(rhs.offsets)
for write_offset in writes:
assert len(writes) == 1
if write_offset != rhs.offsets:
raise ValueError(f"Violation of loop independence condition. Field "
f"{rhs.field} is read at {rhs.offsets} and written at {write_offset}")
self.fields_read.add(rhs.field)
elif isinstance(rhs, sp.Symbol):
self.scopes.access_symbol(rhs)
import ast
import inspect
import textwrap
from typing import Callable, Union, List, Dict, Tuple
import sympy as sp
from pystencils.assignment import Assignment
from pystencils.sympyextensions import SymbolCreator
from pystencils.config import CreateKernelConfig
__all__ = ['kernel']
__all__ = ['kernel', 'kernel_config']
def kernel(func, **kwargs):
"""Decorator to simplify generation of pystencils Assignments.
Changes the meaning of the '@=' operator. Each line containing this operator gives a symbolic assignment
in the result list. Furthermore the meaning of the ternary inline 'if-else' changes meaning to denote a
sympy Piecewise.
The decorated function may not receive any arguments, with exception of an argument called 's' that specifies
a SymbolCreator()
Examples:
>>> import pystencils as ps
>>> @kernel
... def my_kernel(s):
... f, g = ps.fields('f, g: [2D]')
... s.neighbors @= f[0,1] + f[1,0]
... g[0,0] @= s.neighbors + f[0,0] if f[0,0] > 0 else 0
>>> f, g = ps.fields('f, g: [2D]')
>>> assert my_kernel[0].rhs == f[0,1] + f[1,0]
def _kernel(func: Callable[..., None], **kwargs) -> Tuple[List[Assignment], str]:
"""
Convenient function for kernel decorator to prevent code duplication
Args:
func: decorated function
**kwargs: kwargs for the function
Returns:
assignments, function_name
"""
source = inspect.getsource(func)
source = textwrap.dedent(source)
......@@ -51,9 +42,76 @@ def kernel(func, **kwargs):
if 's' in args and 's' not in kwargs:
kwargs['s'] = SymbolCreator()
func(**kwargs)
return assignments, func.__name__
def kernel(func: Callable[..., None], **kwargs) -> List[Assignment]:
"""Decorator to simplify generation of pystencils Assignments.
Changes the meaning of the '@=' operator. Each line containing this operator gives a symbolic assignment
in the result list. Furthermore the meaning of the ternary inline 'if-else' changes meaning to denote a
sympy Piecewise.
The decorated function may not receive any arguments, with exception of an argument called 's' that specifies
a SymbolCreator()
Args:
func: decorated function
**kwargs: kwargs for the function
Examples:
>>> import pystencils as ps
>>> @kernel
... def my_kernel(s):
... f, g = ps.fields('f, g: [2D]')
... s.neighbors @= f[0,1] + f[1,0]
... g[0,0] @= s.neighbors + f[0,0] if f[0,0] > 0 else 0
>>> f, g = ps.fields('f, g: [2D]')
>>> assert my_kernel[0].rhs == f[0,1] + f[1,0]
"""
assignments, _ = _kernel(func, **kwargs)
return assignments
def kernel_config(config: CreateKernelConfig, **kwargs) -> Callable[..., Dict]:
"""Decorator to simplify generation of pystencils Assignments, which takes a configuration
and updates the function name accordingly.
Changes the meaning of the '@=' operator. Each line containing this operator gives a symbolic assignment
in the result list. Furthermore, the meaning of the ternary inline 'if-else' changes meaning to denote a
sympy Piecewise.
The decorated function may not receive any arguments, with exception to an argument called 's' that specifies
a SymbolCreator()
Args:
config: Specify whether to return the list with assignments, or a dictionary containing additional settings
like func_name
Returns:
decorator with config
Examples:
>>> import pystencils as ps
>>> kernel_configuration = ps.CreateKernelConfig()
>>> @kernel_config(kernel_configuration)
... def my_kernel(s):
... src, dst = ps.fields('src, dst: [2D]')
... s.neighbors @= src[0, 1] + src[1, 0]
... dst[0, 0] @= s.neighbors + src[0, 0] if src[0, 0] > 0 else 0
>>> f, g = ps.fields('src, dst: [2D]')
>>> assert my_kernel['assignments'][0].rhs == f[0, 1] + f[1, 0]
"""
def decorator(func: Callable[..., None]) -> Union[List[Assignment], Dict]:
"""
Args:
func: decorated function
Returns:
Dict for unpacking into create_kernel
"""
assignments, func_name = _kernel(func, **kwargs)
config.function_name = func_name
return {'assignments': assignments, 'config': config}
return decorator
# noinspection PyMethodMayBeStatic
class KernelFunctionRewrite(ast.NodeTransformer):
......
import functools
import itertools
from types import MappingProxyType
import warnings
from typing import Union, List
import sympy as sp
from pystencils.config import CreateKernelConfig
from pystencils.assignment import Assignment
from pystencils.astnodes import Block, Conditional, LoopOverCoordinate, SympyAssignment
from pystencils.assignment import Assignment, AddAugmentedAssignment
from pystencils.astnodes import Node, Block, Conditional, LoopOverCoordinate, SympyAssignment
from pystencils.cpu.vectorization import vectorize
from pystencils.enums import Target, Backend
from pystencils.field import Field, FieldType
from pystencils.gpucuda.indexing import indexing_creator_from_params
from pystencils.node_collection import NodeCollection
from pystencils.simp.assignment_collection import AssignmentCollection
from pystencils.kernel_contrains_check import KernelConstraintsCheck
from pystencils.simplificationfactory import create_simplification_strategy
from pystencils.stencil import direction_string_to_offset, inverse_direction_string
from pystencils.transformations import (
loop_blocking, move_constants_before_loop, remove_conditionals_in_staggered_kernel)
def create_kernel(assignments,
target='cpu',
data_type="double",
iteration_slice=None,
ghost_layers=None,
skip_independence_check=False,
cpu_openmp=False,
cpu_vectorize_info=None,
cpu_blocking=None,
omp_single_loop=True,
gpu_indexing='block',
gpu_indexing_params=MappingProxyType({}),
use_textures_for_interpolation=True,
cpu_prepend_optimizations=[],
use_auto_for_assignments=False,
opencl_queue=None,
opencl_ctx=None):
def create_kernel(assignments: Union[Assignment, List[Assignment],
AddAugmentedAssignment, List[AddAugmentedAssignment],
AssignmentCollection, List[Node], NodeCollection],
*,
config: CreateKernelConfig = None, **kwargs):
"""
Creates abstract syntax tree (AST) of kernel, using a list of update equations.
This function forms the general API and delegates the kernel creation to others depending on the CreateKernelConfig.
Args:
assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection`
target: 'cpu', 'llvm', 'gpu' or 'opencl'
data_type: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name
to type
iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \
part of the field is iterated over
ghost_layers: a single integer specifies the ghost layer count at all borders, can also be a sequence of
pairs ``[(x_lower_gl, x_upper_gl), .... ]``. These layers are excluded from the iteration.
If left to default, the number of ghost layers is determined automatically.
skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for
periodicity kernel, that access the field outside the iteration bounds. Use with care!
cpu_openmp: True or number of threads for OpenMP parallelization, False for no OpenMP
omp_single_loop: if OpenMP is active: whether multiple outer loops are permitted
cpu_vectorize_info: a dictionary with keys, 'vector_instruction_set', 'assume_aligned' and 'nontemporal'
for documentation of these parameters see vectorize function. Example:
'{'instruction_set': 'avx512', 'assume_aligned': True, 'nontemporal':True}'
cpu_blocking: a tuple of block sizes or None if no blocking should be applied
gpu_indexing: either 'block' or 'line' , or custom indexing class, see `AbstractIndexing`
gpu_indexing_params: dict with indexing parameters (constructor parameters of indexing class)
e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }'
cpu_prepend_optimizations: list of extra optimizations to perform first on the AST
config: CreateKernelConfig which includes the needed configuration
kwargs: Arguments for updating the config
Returns:
abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
......@@ -67,8 +41,8 @@ def create_kernel(assignments,
>>> import numpy as np
>>> s, d = ps.fields('s, d: [2D]')
>>> assignment = ps.Assignment(d[0,0], s[0, 1] + s[0, -1] + s[1, 0] + s[-1, 0])
>>> ast = ps.create_kernel(assignment, target='cpu', cpu_openmp=True)
>>> kernel = ast.compile()
>>> kernel_ast = ps.create_kernel(assignment, config=ps.CreateKernelConfig(cpu_openmp=True))
>>> kernel = kernel_ast.compile()
>>> d_arr = np.zeros([5, 5])
>>> kernel(d=d_arr, s=np.ones([5, 5]))
>>> d_arr
......@@ -78,76 +52,132 @@ def create_kernel(assignments,
[0., 4., 4., 4., 0.],
[0., 0., 0., 0., 0.]])
"""
# ---- Updating configuration from kwargs
if not config:
config = CreateKernelConfig(**kwargs)
else:
for k, v in kwargs.items():
if not hasattr(config, k):
raise KeyError(f'{v} is not a valid kwarg. Please look in CreateKernelConfig for valid settings')
setattr(config, k, v)
# ---- Normalizing parameters
if isinstance(assignments, Assignment):
if isinstance(assignments, (Assignment, AddAugmentedAssignment)):
assignments = [assignments]
assert assignments, "Assignments must not be empty!"
split_groups = ()
if isinstance(assignments, AssignmentCollection):
if 'split_groups' in assignments.simplification_hints:
split_groups = assignments.simplification_hints['split_groups']
assignments = assignments.all_assignments
# ---- Creating ast
if target == 'cpu':
from pystencils.cpu import create_kernel
from pystencils.cpu import add_openmp
ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
iteration_slice=iteration_slice, ghost_layers=ghost_layers,
skip_independence_check=skip_independence_check)
for optimization in cpu_prepend_optimizations:
optimization(ast)
omp_collapse = None
if cpu_blocking:
omp_collapse = loop_blocking(ast, cpu_blocking)
if cpu_openmp:
add_openmp(ast, num_threads=cpu_openmp, collapse=omp_collapse, assume_single_outer_loop=omp_single_loop)
if cpu_vectorize_info:
if cpu_vectorize_info is True:
vectorize(ast)
elif isinstance(cpu_vectorize_info, dict):
vectorize(ast, **cpu_vectorize_info)
else:
raise ValueError("Invalid value for cpu_vectorize_info")
elif target == 'llvm':
from pystencils.llvm import create_kernel
ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
iteration_slice=iteration_slice, ghost_layers=ghost_layers)
elif target == 'gpu' or target == 'opencl':
from pystencils.gpucuda import create_cuda_kernel
ast = create_cuda_kernel(assignments, type_info=data_type,
indexing_creator=indexing_creator_from_params(gpu_indexing, gpu_indexing_params),
iteration_slice=iteration_slice, ghost_layers=ghost_layers,
skip_independence_check=skip_independence_check,
use_textures_for_interpolation=use_textures_for_interpolation)
if target == 'opencl':
from pystencils.opencl.opencljit import make_python_function
ast._backend = 'opencl'
ast.compile = functools.partial(make_python_function, ast, opencl_queue, opencl_ctx)
ast._target = 'opencl'
ast._backend = 'opencl'
return ast
if isinstance(assignments, list):
assignments = NodeCollection(assignments)
elif isinstance(assignments, AssignmentCollection):
# TODO Markus check and doku
# --- applying first default simplifications
try:
if config.default_assignment_simplifications:
simplification = create_simplification_strategy()
assignments = simplification(assignments)
except Exception as e:
warnings.warn(f"It was not possible to apply the default pystencils optimisations to the "
f"AssignmentCollection due to the following problem :{e}")
simplification_hints = assignments.simplification_hints
assignments = NodeCollection.from_assignment_collection(assignments)
assignments.simplification_hints = simplification_hints
if config.index_fields:
return create_indexed_kernel(assignments, config=config)
else:
raise ValueError("Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target,))
return create_domain_kernel(assignments, config=config)
def create_domain_kernel(assignments: NodeCollection, *, config: CreateKernelConfig):
"""
Creates abstract syntax tree (AST) of kernel, using a NodeCollection.
Note that `create_domain_kernel` is a lower level function which shoul be accessed by not providing `index_fields`
to create_kernel
Args:
assignments: `pystencils.node_collection.NodeCollection` containing all assignements and nodes to be processed
config: CreateKernelConfig which includes the needed configuration
if use_auto_for_assignments:
Returns:
abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
can be compiled with through its 'compile()' member
Example:
>>> import pystencils as ps
>>> import numpy as np
>>> from pystencils.kernelcreation import create_domain_kernel
>>> from pystencils.node_collection import NodeCollection
>>> s, d = ps.fields('s, d: [2D]')
>>> assignment = ps.Assignment(d[0,0], s[0, 1] + s[0, -1] + s[1, 0] + s[-1, 0])
>>> kernel_config = ps.CreateKernelConfig(cpu_openmp=True)
>>> kernel_ast = create_domain_kernel(NodeCollection([assignment]), config=kernel_config)
>>> kernel = kernel_ast.compile()
>>> d_arr = np.zeros([5, 5])
>>> kernel(d=d_arr, s=np.ones([5, 5]))
>>> d_arr
array([[0., 0., 0., 0., 0.],
[0., 4., 4., 4., 0.],
[0., 4., 4., 4., 0.],
[0., 4., 4., 4., 0.],
[0., 0., 0., 0., 0.]])
"""
# --- eval
assignments.evaluate_terms()
# FUTURE WORK from here we shouldn't NEED sympy
# --- check constrains
check = KernelConstraintsCheck(check_independence_condition=not config.skip_independence_check,
check_double_write_condition=not config.allow_double_writes)
check.visit(assignments)
assignments.bound_fields = check.fields_written
assignments.rhs_fields = check.fields_read
# ---- Creating ast
ast = None
if config.target == Target.CPU:
if config.backend == Backend.C:
from pystencils.cpu import add_openmp, create_kernel
ast = create_kernel(assignments, config=config)
for optimization in config.cpu_prepend_optimizations:
optimization(ast)
omp_collapse = None
if config.cpu_blocking:
omp_collapse = loop_blocking(ast, config.cpu_blocking)
if config.cpu_openmp:
add_openmp(ast, num_threads=config.cpu_openmp, collapse=omp_collapse,
assume_single_outer_loop=config.omp_single_loop)
if config.cpu_vectorize_info:
if config.cpu_vectorize_info is True:
vectorize(ast)
elif isinstance(config.cpu_vectorize_info, dict):
vectorize(ast, **config.cpu_vectorize_info)
if config.cpu_openmp and config.cpu_blocking and 'nontemporal' in config.cpu_vectorize_info and \
config.cpu_vectorize_info['nontemporal'] and 'cachelineZero' in ast.instruction_set:
# This condition is stricter than it needs to be: if blocks along the fastest axis start on a
# cache line boundary, it's okay. But we cannot determine that here.
# We don't need to disallow OpenMP collapsing because it is never applied to the inner loop.
raise ValueError("Blocking cannot be combined with cacheline-zeroing")
else:
raise ValueError("Invalid value for cpu_vectorize_info")
elif config.target == Target.GPU:
if config.backend == Backend.CUDA:
from pystencils.gpu import create_cuda_kernel
ast = create_cuda_kernel(assignments, config=config)
if not ast:
raise NotImplementedError(
f'{config.target} together with {config.backend} is not supported by `create_domain_kernel`')
if config.use_auto_for_assignments:
for a in ast.atoms(SympyAssignment):
a.use_auto = True
return ast
def create_indexed_kernel(assignments,
index_fields,
target='cpu',
data_type="double",
coordinate_names=('x', 'y', 'z'),
cpu_openmp=True,
gpu_indexing='block',
gpu_indexing_params=MappingProxyType({}),
use_textures_for_interpolation=True,
opencl_queue=None,
opencl_ctx=None):
def create_indexed_kernel(assignments: NodeCollection, *, config: CreateKernelConfig):
"""
Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with
coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling.
......@@ -157,12 +187,22 @@ def create_indexed_kernel(assignments,
'coordinate_names' parameter. The struct can have also other fields that can be read and written in the kernel, for
example boundary parameters.
index_fields: list of index fields, i.e. 1D fields with struct data type
coordinate_names: name of the coordinate fields in the struct data type
Note that `create_indexed_kernel` is a lower level function which shoul be accessed by providing `index_fields`
to create_kernel
Args:
assignments: `pystencils.node_collection.NodeCollection` containing all assignements and nodes to be processed
config: CreateKernelConfig which includes the needed configuration
Returns:
abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
can be compiled with through its 'compile()' member
Example:
>>> import pystencils as ps
>>> from pystencils.node_collection import NodeCollection
>>> import numpy as np
>>> from pystencils.kernelcreation import create_indexed_kernel
>>>
>>> # Index field stores the indices of the cell to visit together with optional values
>>> index_arr_dtype = np.dtype([('x', np.int32), ('y', np.int32), ('val', np.double)])
......@@ -171,9 +211,10 @@ def create_indexed_kernel(assignments,
>>>
>>> # Additional values stored in index field can be accessed in the kernel as well
>>> s, d = ps.fields('s, d: [2D]')
>>> assignment = ps.Assignment(d[0,0], 2 * s[0, 1] + 2 * s[1, 0] + idx_field('val'))
>>> ast = create_indexed_kernel(assignment, [idx_field], coordinate_names=('x', 'y'))
>>> kernel = ast.compile()
>>> assignment = ps.Assignment(d[0, 0], 2 * s[0, 1] + 2 * s[1, 0] + idx_field('val'))
>>> kernel_config = ps.CreateKernelConfig(index_fields=[idx_field], coordinate_names=('x', 'y'))
>>> kernel_ast = create_indexed_kernel(NodeCollection([assignment]), config=kernel_config)
>>> kernel = kernel_ast.compile()
>>> d_arr = np.zeros([5, 5])
>>> kernel(s=np.ones([5, 5]), d=d_arr, idx=index_arr)
>>> d_arr
......@@ -182,42 +223,37 @@ def create_indexed_kernel(assignments,
[0. , 0. , 4.2, 0. , 0. ],
[0. , 0. , 0. , 4.3, 0. ],
[0. , 0. , 0. , 0. , 0. ]])
"""
if isinstance(assignments, Assignment):
assignments = [assignments]
elif isinstance(assignments, AssignmentCollection):
assignments = assignments.all_assignments
if target == 'cpu':
from pystencils.cpu import create_indexed_kernel
from pystencils.cpu import add_openmp
ast = create_indexed_kernel(assignments, index_fields=index_fields, type_info=data_type,
coordinate_names=coordinate_names)
if cpu_openmp:
add_openmp(ast, num_threads=cpu_openmp)
return ast
elif target == 'llvm':
raise NotImplementedError("Indexed kernels are not yet supported in LLVM backend")
elif target == 'gpu' or target == 'opencl':
from pystencils.gpucuda import created_indexed_cuda_kernel
idx_creator = indexing_creator_from_params(gpu_indexing, gpu_indexing_params)
ast = created_indexed_cuda_kernel(assignments,
index_fields,
type_info=data_type,
coordinate_names=coordinate_names,
indexing_creator=idx_creator,
use_textures_for_interpolation=use_textures_for_interpolation)
if target == 'opencl':
from pystencils.opencl.opencljit import make_python_function
ast._backend = 'opencl'
ast.compile = functools.partial(make_python_function, ast, opencl_queue, opencl_ctx)
ast._target = 'opencl'
ast._backend = 'opencl'
return ast
else:
raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,))
# --- eval
assignments.evaluate_terms()
# FUTURE WORK from here we shouldn't NEED sympy
# --- check constrains
check = KernelConstraintsCheck(check_independence_condition=not config.skip_independence_check,
check_double_write_condition=not config.allow_double_writes)
check.visit(assignments)
assignments.bound_fields = check.fields_written
assignments.rhs_fields = check.fields_read
ast = None
if config.target == Target.CPU and config.backend == Backend.C:
from pystencils.cpu import add_openmp, create_indexed_kernel
ast = create_indexed_kernel(assignments, config=config)
if config.cpu_openmp:
add_openmp(ast, num_threads=config.cpu_openmp)
elif config.target == Target.GPU:
if config.backend == Backend.CUDA:
from pystencils.gpu import created_indexed_cuda_kernel
ast = created_indexed_cuda_kernel(assignments, config=config)
if not ast:
raise NotImplementedError(f'Indexed kernels are not yet supported for {config.target} with {config.backend}')
return ast
def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions=False, **kwargs):
def create_staggered_kernel(assignments, target: Target = Target.CPU, gpu_exclusive_conditions=False, **kwargs):
"""Kernel that updates a staggered field.
.. image:: /img/staggered_grid.svg
......@@ -231,7 +267,7 @@ def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions=
regular fields are passed through to `create_kernel`. Multiple different staggered fields can be
used, but they all need to use the same stencil (i.e. the same number of staggered points) and
shape.
target: 'cpu', 'llvm' or 'gpu'
target: 'CPU' or 'GPU'
gpu_exclusive_conditions: disable the use of multiple conditionals inside the loop. The outer layers are then
handled in an else branch.
kwargs: passed directly to create_kernel, iteration_slice and ghost_layers parameters are not allowed
......@@ -239,7 +275,16 @@ def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions=
Returns:
AST, see `create_kernel`
"""
assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs and 'omp_single_loop' not in kwargs
# TODO: Add doku like in the other kernels
if 'ghost_layers' in kwargs:
assert kwargs['ghost_layers'] is None
del kwargs['ghost_layers']
if 'iteration_slice' in kwargs:
assert kwargs['iteration_slice'] is None
del kwargs['iteration_slice']
if 'omp_single_loop' in kwargs:
assert kwargs['omp_single_loop'] is False
del kwargs['omp_single_loop']
if isinstance(assignments, AssignmentCollection):
subexpressions = assignments.subexpressions + [a for a in assignments.main_assignments
......@@ -317,7 +362,6 @@ def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions=
inner_assignment = []
for assignment in assignments:
direction = stencil[assignment.lhs.index[0]]
inner_assignment.append(SympyAssignment(assignment.lhs, assignment.rhs))
last_conditional = Conditional(sp.And(*[condition(d) for d in stencil]),
Block(inner_assignment), outer_assignment)
......@@ -325,11 +369,8 @@ def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions=
[SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \
[last_conditional]
if target == 'cpu':
from pystencils.cpu import create_kernel as create_kernel_cpu
ast = create_kernel_cpu(final_assignments, ghost_layers=ghost_layers, omp_single_loop=False, **kwargs)
else:
ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs)
config = CreateKernelConfig(target=target, ghost_layers=ghost_layers, omp_single_loop=False, **kwargs)
ast = create_kernel(final_assignments, config=config)
return ast
for assignment in assignments:
......@@ -343,6 +384,11 @@ def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions=
remove_start_conditional = any([gl[0] == 0 for gl in ghost_layers])
prepend_optimizations = [lambda ast: remove_conditionals_in_staggered_kernel(ast, remove_start_conditional),
move_constants_before_loop]
ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, omp_single_loop=False,
cpu_prepend_optimizations=prepend_optimizations, **kwargs)
if 'cpu_prepend_optimizations' in kwargs:
prepend_optimizations += kwargs['cpu_prepend_optimizations']
del kwargs['cpu_prepend_optimizations']
config = CreateKernelConfig(ghost_layers=ghost_layers, target=target, omp_single_loop=False,
cpu_prepend_optimizations=prepend_optimizations, **kwargs)
ast = create_kernel(final_assignments, config=config)
return ast
from typing import Any, Dict, List, Union, Optional, Set
import sympy
import sympy as sp
from sympy.codegen.rewriting import ReplaceOptim, optimize
from pystencils.assignment import Assignment, AddAugmentedAssignment
import pystencils.astnodes as ast
from pystencils.backends.cbackend import CustomCodeNode
from pystencils.functions import DivFunc
from pystencils.simp import AssignmentCollection
from pystencils.typing import FieldPointerSymbol
class NodeCollection:
def __init__(self, assignments: List[Union[ast.Node, Assignment]],
simplification_hints: Optional[Dict[str, Any]] = None,
bound_fields: Set[sp.Symbol] = None, rhs_fields: Set[sp.Symbol] = None):
def visit(obj):
if isinstance(obj, (list, tuple)):
return [visit(e) for e in obj]
if isinstance(obj, Assignment):
if isinstance(obj.lhs, FieldPointerSymbol):
return ast.SympyAssignment(obj.lhs, obj.rhs, is_const=obj.lhs.dtype.const)
return ast.SympyAssignment(obj.lhs, obj.rhs)
elif isinstance(obj, AddAugmentedAssignment):
return ast.SympyAssignment(obj.lhs, obj.lhs + obj.rhs)
elif isinstance(obj, ast.SympyAssignment):
return obj
elif isinstance(obj, ast.Conditional):
true_block = visit(obj.true_block)
false_block = None if obj.false_block is None else visit(obj.false_block)
return ast.Conditional(obj.condition_expr, true_block=true_block, false_block=false_block)
elif isinstance(obj, ast.Block):
return ast.Block([visit(e) for e in obj.args])
elif isinstance(obj, ast.Node) and not isinstance(obj, ast.LoopOverCoordinate):
return obj
else:
raise ValueError("Invalid object in the List of Assignments " + str(type(obj)))
self.all_assignments = visit(assignments)
self.simplification_hints = simplification_hints if simplification_hints else {}
self.bound_fields = bound_fields if bound_fields else {}
self.rhs_fields = rhs_fields if rhs_fields else {}
@staticmethod
def from_assignment_collection(assignment_collection: AssignmentCollection):
return NodeCollection(assignments=assignment_collection.all_assignments,
simplification_hints=assignment_collection.simplification_hints,
bound_fields=assignment_collection.bound_fields,
rhs_fields=assignment_collection.rhs_fields)
def evaluate_terms(self):
evaluate_constant_terms = ReplaceOptim(
lambda e: hasattr(e, 'is_constant') and e.is_constant and not e.is_integer,
lambda p: p.evalf()
)
evaluate_pow = ReplaceOptim(
lambda e: e.is_Pow and e.exp.is_Integer and abs(e.exp) <= 8,
lambda p: sp.UnevaluatedExpr(sp.Mul(*([p.base] * +p.exp), evaluate=False)) if p.exp > 0 else
(DivFunc(sp.Integer(1), p.base) if p.exp == -1 else
DivFunc(sp.Integer(1), sp.UnevaluatedExpr(sp.Mul(*([p.base] * -p.exp), evaluate=False))))
)
sympy_optimisations = [evaluate_constant_terms, evaluate_pow]
def visitor(node):
if isinstance(node, CustomCodeNode):
return node
elif isinstance(node, ast.Block):
return node.func([visitor(child) for child in node.args])
elif isinstance(node, ast.SympyAssignment):
new_lhs = visitor(node.lhs)
new_rhs = visitor(node.rhs)
return node.func(new_lhs, new_rhs, node.is_const, node.use_auto)
elif isinstance(node, ast.Node):
return node.func(*[visitor(child) for child in node.args])
elif isinstance(node, sympy.Basic):
return optimize(node, sympy_optimisations)
else:
raise NotImplementedError(f'{node} {type(node)} has no valid visitor')
self.all_assignments = [visitor(assignment) for assignment in self.all_assignments]
......@@ -34,7 +34,7 @@ def to_placeholder_function(expr, name):
"""
symbols = list(expr.atoms(sp.Symbol))
symbols.sort(key=lambda e: e.name)
derivative_symbols = [sp.Symbol("_d{}_d{}".format(name, s.name)) for s in symbols]
derivative_symbols = [sp.Symbol(f"_d{name}_d{s.name}") for s in symbols]
derivatives = [sp.diff(expr, s) for s in symbols]
assignments = [Assignment(sp.Symbol(name), expr)]
......
File moved
import copy
import numpy as np
import sympy as sp
from pystencils.typing import TypedSymbol, CastFunc
from pystencils.astnodes import LoopOverCoordinate
from pystencils.backends.cbackend import CustomCodeNode
from pystencils.sympyextensions import fast_subs
class RNGBase(CustomCodeNode):
id = 0
def __init__(self, dim, time_step=TypedSymbol("time_step", np.uint32), offsets=None, keys=None):
if keys is None:
keys = (0,) * self._num_keys
if offsets is None:
offsets = (0,) * dim
if len(keys) != self._num_keys:
raise ValueError(f"Provided {len(keys)} keys but need {self._num_keys}")
if len(offsets) != dim:
raise ValueError(f"Provided {len(offsets)} offsets but need {dim}")
coordinates = [LoopOverCoordinate.get_loop_counter_symbol(i) + offsets[i] for i in range(dim)]
if dim < 3:
coordinates.append(0)
self._args = sp.sympify([time_step, *coordinates, *keys])
self.result_symbols = tuple(TypedSymbol(f'random_{self.id}_{i}', self._data_type)
for i in range(self._num_vars))
symbols_read = set.union(*[s.atoms(sp.Symbol) for s in self.args])
super().__init__("", symbols_read=symbols_read, symbols_defined=self.result_symbols)
self.headers = [f'"{self._name.split("_")[0]}_rand.h"']
RNGBase.id += 1
@property
def args(self):
return self._args
def fast_subs(self, subs_dict, skip):
rng = copy.deepcopy(self)
rng._args = [fast_subs(a, subs_dict, skip) for a in rng._args]
return rng
def get_code(self, dialect, vector_instruction_set, print_arg):
code = "\n"
for r in self.result_symbols:
if vector_instruction_set and not self.args[1].atoms(CastFunc):
# this vector RNG has become scalar through substitution
code += f"{r.dtype} {r.name};\n"
else:
code += f"{vector_instruction_set[r.dtype.c_name] if vector_instruction_set else r.dtype} " + \
f"{r.name};\n"
args = [print_arg(a) for a in self.args] + ['' + r.name for r in self.result_symbols]
code += (self._name + "(" + ", ".join(args) + ");\n")
return code
def __repr__(self):
return ", ".join([str(s) for s in self.result_symbols]) + " \\leftarrow " + \
self._name.capitalize() + "_RNG(" + ", ".join([str(a) for a in self.args]) + ")"
def _hashable_content(self):
return (self._name, *self.result_symbols, *self.args)
def __eq__(self, other):
return type(self) is type(other) and self._hashable_content() == other._hashable_content()
def __hash__(self):
return hash(self._hashable_content())
class PhiloxTwoDoubles(RNGBase):
_name = "philox_double2"
_data_type = np.float64
_num_vars = 2
_num_keys = 2
class PhiloxFourFloats(RNGBase):
_name = "philox_float4"
_data_type = np.float32
_num_vars = 4
_num_keys = 2
class AESNITwoDoubles(RNGBase):
_name = "aesni_double2"
_data_type = np.float64
_num_vars = 2
_num_keys = 4
class AESNIFourFloats(RNGBase):
_name = "aesni_float4"
_data_type = np.float32
_num_vars = 4
_num_keys = 4
def random_symbol(assignment_list, dim, seed=TypedSymbol("seed", np.uint32), rng_node=PhiloxTwoDoubles,
time_step=TypedSymbol("time_step", np.uint32), offsets=None):
"""Return a symbol generator for random numbers
Args:
assignment_list: the subexpressions member of an AssignmentCollection, into which helper variables assignments
will be inserted
dim: 2 or 3 for two or three spatial dimensions
seed: an integer or TypedSymbol(..., np.uint32) to seed the random number generator. If you create multiple
symbol generators, please pass them different seeds so you don't get the same stream of random numbers!
rng_node: which random number generator to use (PhiloxTwoDoubles, PhiloxFourFloats, AESNITwoDoubles,
AESNIFourFloats).
time_step: TypedSymbol(..., np.uint32) that indicates the number of the current time step
offsets: tuple of offsets (constant integers or TypedSymbol(..., np.uint32)) that give the global coordinates
of the local origin
"""
counter = 0
while True:
keys = (counter, seed) + (0,) * (rng_node._num_keys - 2)
node = rng_node(dim, keys=keys, time_step=time_step, offsets=offsets)
inserted = False
for symbol in node.result_symbols:
if not inserted:
assignment_list.insert(0, node)
inserted = True
yield symbol
counter += 1
import socket
import time
from types import MappingProxyType
from typing import Dict, Iterator, Sequence
import blitzdb
import six
from blitzdb.backends.file.backend import serializer_classes
from blitzdb.backends.file.utils import JsonEncoder
from pystencils.cpu.cpujit import get_compiler_config
from pystencils import CreateKernelConfig, Target, Backend, Field
import json
import sympy as sp
from pystencils.typing import BasicType
class PystencilsJsonEncoder(JsonEncoder):
def default(self, obj):
if isinstance(obj, CreateKernelConfig):
return obj.__dict__
if isinstance(obj, (sp.Float, sp.Rational)):
return float(obj)
if isinstance(obj, sp.Integer):
return int(obj)
if isinstance(obj, (BasicType, MappingProxyType)):
return str(obj)
if isinstance(obj, (Target, Backend, sp.Symbol)):
return obj.name
if isinstance(obj, Field):
return f"pystencils.Field(name = {obj.name}, field_type = {obj.field_type.name}, " \
f"dtype = {str(obj.dtype)}, layout = {obj.layout}, shape = {obj.shape}, " \
f"strides = {obj.strides})"
return JsonEncoder.default(self, obj)
class PystencilsJsonSerializer(object):
@classmethod
def serialize(cls, data):
if six.PY3:
if isinstance(data, bytes):
return json.dumps(data.decode('utf-8'), cls=PystencilsJsonEncoder, ensure_ascii=False).encode('utf-8')
else:
return json.dumps(data, cls=PystencilsJsonEncoder, ensure_ascii=False).encode('utf-8')
else:
return json.dumps(data, cls=PystencilsJsonEncoder, ensure_ascii=False).encode('utf-8')
@classmethod
def deserialize(cls, data):
if six.PY3:
return json.loads(data.decode('utf-8'))
else:
return json.loads(data.decode('utf-8'))
class Database:
......@@ -46,7 +96,7 @@ class Database:
class SimulationResult(blitzdb.Document):
pass
def __init__(self, file: str) -> None:
def __init__(self, file: str, serializer_info: tuple = None) -> None:
if file.startswith("mongo://"):
from pymongo import MongoClient
db_name = file[len("mongo://"):]
......@@ -57,6 +107,10 @@ class Database:
self.backend.autocommit = True
if serializer_info:
serializer_classes.update({serializer_info[0]: serializer_info[1]})
self.backend.load_config({'serializer_class': serializer_info[0]}, True)
def save(self, params: Dict, result: Dict, env: Dict = None, **kwargs) -> None:
"""Stores a simulation result in the database.
......@@ -120,7 +174,7 @@ class Database:
Returns:
pandas data frame
"""
from pandas.io.json import json_normalize
from pandas import json_normalize
query_result = self.filter_params(parameter_query)
attributes = [e.attributes for e in query_result]
......@@ -146,10 +200,15 @@ class Database:
'cpuCompilerConfig': get_compiler_config(),
}
try:
from git import Repo, InvalidGitRepositoryError
from git import Repo
except ImportError:
return result
try:
from git import InvalidGitRepositoryError
repo = Repo(search_parent_directories=True)
result['git_hash'] = str(repo.head.commit)
except (ImportError, InvalidGitRepositoryError):
except InvalidGitRepositoryError:
pass
return result
......
......@@ -9,6 +9,7 @@ from time import sleep
from typing import Any, Callable, Dict, Optional, Sequence, Tuple
from pystencils.runhelper import Database
from pystencils.runhelper.db import PystencilsJsonSerializer
from pystencils.utils import DotDict
ParameterDict = Dict[str, Any]
......@@ -54,10 +55,11 @@ class ParameterStudy:
Run = namedtuple("Run", ['parameter_dict', 'weight'])
def __init__(self, run_function: Callable[..., Dict], runs: Sequence = (),
database_connector: str = './db') -> None:
database_connector: str = './db',
serializer_info: tuple = ('pystencils_serializer', PystencilsJsonSerializer)) -> None:
self.runs = list(runs)
self.run_function = run_function
self.db = Database(database_connector)
self.db = Database(database_connector, serializer_info)
def add_run(self, parameter_dict: ParameterDict, weight: int = 1) -> None:
"""Schedule a dictionary of parameters to run in this parameter study.
......@@ -215,7 +217,7 @@ class ParameterStudy:
def log_message(self, fmt, *args):
return
print("Listening to connections on {}:{}. Scenarios to simulate: {}".format(ip, port, len(filtered_runs)))
print(f"Listening to connections on {ip}:{port}. Scenarios to simulate: {len(filtered_runs)}")
server = HTTPServer((ip, port), ParameterStudyServer)
while len(ParameterStudyServer.currently_running) > 0 or len(ParameterStudyServer.runs) > 0:
server.handle_request()
......@@ -241,7 +243,7 @@ class ParameterStudy:
from urllib.error import URLError
import time
parameter_update = {} if parameter_update is None else parameter_update
url = "http://{}:{}".format(server, port)
url = f"http://{server}:{port}"
client_name = client_name.format(hostname=socket.gethostname(), pid=os.getpid())
start_time = time.time()
while True:
......@@ -265,7 +267,7 @@ class ParameterStudy:
'client_name': client_name}
urlopen(url + '/result', data=json.dumps(answer).encode())
except URLError:
print("Cannot connect to server {} retrying in 5 seconds...".format(url))
print(f"Cannot connect to server {url} retrying in 5 seconds...")
sleep(5)
def run_from_command_line(self, argv: Optional[Sequence[str]] = None) -> None:
......
......@@ -2,8 +2,7 @@ import numpy as np
import sympy as sp
import pystencils as ps
import pystencils.jupyter
from pystencils.jupyter import make_imshow_animation, display_animation, set_display_mode
import pystencils.plot as plt
import pystencils.sympy_gmpy_bug_workaround
__all__ = ['sp', 'np', 'ps', 'plt']
__all__ = ['sp', 'np', 'ps', 'plt', 'make_imshow_animation', 'display_animation', 'set_display_mode']
from .assignment_collection import AssignmentCollection
from .simplifications import (
add_subexpressions_for_constants,
add_subexpressions_for_divisions, add_subexpressions_for_field_reads,
apply_on_all_subexpressions, apply_to_all_assignments,
add_subexpressions_for_sums, apply_on_all_subexpressions, apply_to_all_assignments,
subexpression_substitution_in_existing_subexpressions,
subexpression_substitution_in_main_assignments, sympy_cse, sympy_cse_on_assignment_list)
from .subexpression_insertion import (
insert_aliases, insert_zeros, insert_constants,
insert_constant_additions, insert_constant_multiples,
insert_squares, insert_symbol_times_minus_one)
from .simplificationstrategy import SimplificationStrategy
__all__ = ['AssignmentCollection', 'SimplificationStrategy',
'sympy_cse', 'sympy_cse_on_assignment_list', 'apply_to_all_assignments',
'apply_on_all_subexpressions', 'subexpression_substitution_in_existing_subexpressions',
'subexpression_substitution_in_main_assignments', 'add_subexpressions_for_divisions',
'add_subexpressions_for_field_reads']
'subexpression_substitution_in_main_assignments', 'add_subexpressions_for_constants',
'add_subexpressions_for_divisions', 'add_subexpressions_for_sums', 'add_subexpressions_for_field_reads',
'insert_aliases', 'insert_zeros', 'insert_constants',
'insert_constant_additions', 'insert_constant_multiples',
'insert_squares', 'insert_symbol_times_minus_one']
......@@ -6,8 +6,7 @@ import sympy as sp
import pystencils
from pystencils.assignment import Assignment
from pystencils.simp.simplifications import (
sort_assignments_topologically, transform_lhs_and_rhs, transform_rhs)
from pystencils.simp.simplifications import (sort_assignments_topologically, transform_lhs_and_rhs, transform_rhs)
from pystencils.sympyextensions import count_operations, fast_subs
......@@ -20,13 +19,14 @@ class AssignmentCollection:
Additionally a dictionary of simplification hints is stored, which are set by the functions that create
assignment collections to transport information to the simplification system.
Attributes:
main_assignments: list of assignments
subexpressions: list of assignments defining subexpressions used in main equations
simplification_hints: dict that is used to annotate the assignment collection with hints that are
Args:
main_assignments: List of assignments. Main assignments are characterised, that the right hand side of each
assignment is a field access. Thus the generated equations write on arrays.
subexpressions: List of assignments defining subexpressions used in main equations
simplification_hints: Dict that is used to annotate the assignment collection with hints that are
used by the simplification system. See documentation of the simplification rules for
potentially required hints and their meaning.
subexpression_symbol_generator: generator for new symbols that are used when new subexpressions are added
subexpression_symbol_generator: Generator for new symbols that are used when new subexpressions are added
used to get new symbols that are unique for this AssignmentCollection
"""
......@@ -34,9 +34,13 @@ class AssignmentCollection:
# ------------------------------- Creation & Inplace Manipulation --------------------------------------------------
def __init__(self, main_assignments: Union[List[Assignment], Dict[sp.Expr, sp.Expr]],
subexpressions: Union[List[Assignment], Dict[sp.Expr, sp.Expr]] = {},
subexpressions: Union[List[Assignment], Dict[sp.Expr, sp.Expr]] = None,
simplification_hints: Optional[Dict[str, Any]] = None,
subexpression_symbol_generator: Iterator[sp.Symbol] = None) -> None:
if subexpressions is None:
subexpressions = {}
if isinstance(main_assignments, Dict):
main_assignments = [Assignment(k, v)
for k, v in main_assignments.items()]
......@@ -57,8 +61,11 @@ class AssignmentCollection:
self.simplification_hints = simplification_hints
ctrs = [int(n.name[3:])for n in self.rhs_symbols if "xi_" in n.name]
max_ctr = max(ctrs) + 1 if len(ctrs) > 0 else 0
if subexpression_symbol_generator is None:
self.subexpression_symbol_generator = SymbolGen()
self.subexpression_symbol_generator = SymbolGen(ctr=max_ctr)
else:
self.subexpression_symbol_generator = subexpression_symbol_generator
......@@ -103,16 +110,21 @@ class AssignmentCollection:
return self.subexpressions + self.main_assignments
@property
def free_symbols(self) -> Set[sp.Symbol]:
"""All symbols used in the assignment collection, which do not occur as left hand sides in any assignment."""
free_symbols = set()
def rhs_symbols(self) -> Set[sp.Symbol]:
"""All symbols used in the assignment collection, which occur on the rhs of any assignment."""
rhs_symbols = set()
for eq in self.all_assignments:
if isinstance(eq, Assignment):
free_symbols.update(eq.rhs.atoms(sp.Symbol))
rhs_symbols.update(eq.rhs.atoms(sp.Symbol))
elif isinstance(eq, pystencils.astnodes.Node):
free_symbols.update(eq.undefined_symbols)
rhs_symbols.update(eq.undefined_symbols)
return free_symbols - self.bound_symbols
return rhs_symbols
@property
def free_symbols(self) -> Set[sp.Symbol]:
"""All symbols used in the assignment collection, which do not occur as left hand sides in any assignment."""
return self.rhs_symbols - self.bound_symbols
@property
def bound_symbols(self) -> Set[sp.Symbol]:
......@@ -127,11 +139,15 @@ class AssignmentCollection:
bound_symbols_set = bound_symbols_set.union(*[
assignment.symbols_defined for assignment in self.all_assignments
if isinstance(assignment, pystencils.astnodes.Node)
]
)
])
return bound_symbols_set
@property
def rhs_fields(self):
"""All fields accessed in the assignment collection, which do not occur as left hand sides in any assignment."""
return {s.field for s in self.rhs_symbols if hasattr(s, 'field')}
@property
def free_fields(self):
"""All fields accessed in the assignment collection, which do not occur as left hand sides in any assignment."""
......@@ -145,11 +161,9 @@ class AssignmentCollection:
@property
def defined_symbols(self) -> Set[sp.Symbol]:
"""All symbols which occur as left-hand-sides of one of the main equations"""
return (set(
[assignment.lhs for assignment in self.main_assignments if isinstance(assignment, Assignment)]
).union(*[assignment.symbols_defined for assignment in self.main_assignments if isinstance(
assignment, pystencils.astnodes.Node)]
))
lhs_set = set([assignment.lhs for assignment in self.main_assignments if isinstance(assignment, Assignment)])
return (lhs_set.union(*[assignment.symbols_defined for assignment in self.main_assignments
if isinstance(assignment, pystencils.astnodes.Node)]))
@property
def operation_count(self):
......@@ -210,6 +224,7 @@ class AssignmentCollection:
return {s: func(*args, **kwargs) for s, func in lambdas.items()}
return f
# ---------------------------- Creating new modified collections ---------------------------------------------------
def copy(self,
......@@ -263,7 +278,7 @@ class AssignmentCollection:
own_definitions = set([e.lhs for e in self.main_assignments])
other_definitions = set([e.lhs for e in other.main_assignments])
assert len(own_definitions.intersection(other_definitions)) == 0, \
"Cannot new_merged, since both collection define the same symbols"
"Cannot merge collections, since both define the same symbols"
own_subexpression_symbols = {e.lhs: e.rhs for e in self.subexpressions}
substitution_dict = {}
......@@ -271,12 +286,13 @@ class AssignmentCollection:
processed_other_subexpression_equations = []
for other_subexpression_eq in other.subexpressions:
if other_subexpression_eq.lhs in own_subexpression_symbols:
if other_subexpression_eq.rhs == own_subexpression_symbols[other_subexpression_eq.lhs]:
new_rhs = fast_subs(other_subexpression_eq.rhs, substitution_dict)
if new_rhs == own_subexpression_symbols[other_subexpression_eq.lhs]:
continue # exact the same subexpression equation exists already
else:
# different definition - a new name has to be introduced
new_lhs = next(self.subexpression_symbol_generator)
new_eq = Assignment(new_lhs, fast_subs(other_subexpression_eq.rhs, substitution_dict))
new_eq = Assignment(new_lhs, new_rhs)
processed_other_subexpression_equations.append(new_eq)
substitution_dict[other_subexpression_eq.lhs] = new_lhs
else:
......@@ -299,9 +315,9 @@ class AssignmentCollection:
if eq.lhs in symbols_to_extract:
new_assignments.append(eq)
new_sub_expr = [eq for eq in self.subexpressions
new_sub_expr = [eq for eq in self.all_assignments
if eq.lhs in dependent_symbols and eq.lhs not in symbols_to_extract]
return AssignmentCollection(new_assignments, new_sub_expr)
return self.copy(new_assignments, new_sub_expr)
def new_without_unused_subexpressions(self) -> 'AssignmentCollection':
"""Returns new collection that only contains subexpressions required to compute the main assignments."""
......@@ -324,8 +340,10 @@ class AssignmentCollection:
new_eqs = [Assignment(eq.lhs, fast_subs(eq.rhs, subs_dict)) for eq in self.main_assignments]
return self.copy(new_eqs, new_subexpressions)
def new_without_subexpressions(self, subexpressions_to_keep: Set[sp.Symbol] = set()) -> 'AssignmentCollection':
def new_without_subexpressions(self, subexpressions_to_keep=None) -> 'AssignmentCollection':
"""Returns a new collection where all subexpressions have been inserted."""
if subexpressions_to_keep is None:
subexpressions_to_keep = set()
if len(self.subexpressions) == 0:
return self.copy()
......@@ -334,7 +352,7 @@ class AssignmentCollection:
kept_subexpressions = []
if self.subexpressions[0].lhs in subexpressions_to_keep:
substitution_dict = {}
kept_subexpressions = self.subexpressions[0]
kept_subexpressions.append(self.subexpressions[0])
else:
substitution_dict = {self.subexpressions[0].lhs: self.subexpressions[0].rhs}
......@@ -353,6 +371,7 @@ class AssignmentCollection:
def _repr_html_(self):
"""Interface to Jupyter notebook, to display as a nicely formatted HTML table"""
def make_html_equation_table(equations):
no_border = 'style="border:none"'
html_table = '<table style="border:none; width: 100%; ">'
......@@ -378,10 +397,10 @@ class AssignmentCollection:
def __str__(self):
result = "Subexpressions:\n"
for eq in self.subexpressions:
result += "\t{eq}\n".format(eq=eq)
result += f"\t{eq}\n"
result += "Main Assignments:\n"
for eq in self.main_assignments:
result += "\t{eq}\n".format(eq=eq)
result += f"\t{eq}\n"
return result
def __iter__(self):
......@@ -438,14 +457,17 @@ class AssignmentCollection:
class SymbolGen:
"""Default symbol generator producing number symbols ζ_0, ζ_1, ..."""
def __init__(self, symbol="xi"):
self._ctr = 0
def __init__(self, symbol="xi", dtype=None, ctr=0):
self._ctr = ctr
self._symbol = symbol
self._dtype = dtype
def __iter__(self):
return self
def __next__(self):
name = "{}_{}".format(self._symbol, self._ctr)
name = f"{self._symbol}_{self._ctr}"
self._ctr += 1
if self._dtype is not None:
return pystencils.TypedSymbol(name, self._dtype)
return sp.Symbol(name)
from itertools import chain
from typing import Callable, List, Sequence, Union
from collections import defaultdict
import sympy as sp
from pystencils.assignment import Assignment
from pystencils.astnodes import Node
from pystencils.field import AbstractField, Field
from pystencils.sympyextensions import subs_additive
from pystencils.field import Field
from pystencils.sympyextensions import subs_additive, is_constant, recursive_collect
from pystencils.typing import TypedSymbol
def sort_assignments_topologically(assignments: Sequence[Union[Assignment, Node]]) -> List[Union[Assignment, Node]]:
......@@ -18,7 +20,7 @@ def sort_assignments_topologically(assignments: Sequence[Union[Assignment, Node]
elif isinstance(e1, Node):
symbols = e1.symbols_defined
else:
raise NotImplementedError("Cannot sort topologically. Object of type " + type(e1) + " cannot be handled.")
raise NotImplementedError(f"Cannot sort topologically. Object of type {type(e1)} cannot be handled.")
for lhs in symbols:
for c2, e2 in enumerate(assignments):
......@@ -83,6 +85,39 @@ def subexpression_substitution_in_main_assignments(ac):
return ac.copy(result)
def add_subexpressions_for_constants(ac):
"""Extracts constant factors to subexpressions in the given assignment collection.
SymPy will exclude common factors from a sum only if they are symbols. This simplification
can be applied to exclude common numeric constants from multiple terms of a sum. As a consequence,
the number of multiplications is reduced and in some cases, more common subexpressions can be found.
"""
constants_to_subexp_dict = defaultdict(lambda: next(ac.subexpression_symbol_generator))
def visit(expr):
args = list(expr.args)
if len(args) == 0:
return expr
if isinstance(expr, sp.Add) or isinstance(expr, sp.Mul):
for i, arg in enumerate(args):
if is_constant(arg) and abs(arg) != 1:
if arg < 0:
args[i] = - constants_to_subexp_dict[- arg]
else:
args[i] = constants_to_subexp_dict[arg]
return expr.func(*(visit(a) for a in args))
main_assignments = [Assignment(a.lhs, visit(a.rhs)) for a in ac.main_assignments]
subexpressions = [Assignment(a.lhs, visit(a.rhs)) for a in ac.subexpressions]
symbols_to_collect = set(constants_to_subexp_dict.values())
main_assignments = [Assignment(a.lhs, recursive_collect(a.rhs, symbols_to_collect, True)) for a in main_assignments]
subexpressions = [Assignment(a.lhs, recursive_collect(a.rhs, symbols_to_collect, True)) for a in subexpressions]
subexpressions = [Assignment(symb, c) for c, symb in constants_to_subexp_dict.items()] + subexpressions
return ac.copy(main_assignments=main_assignments, subexpressions=subexpressions)
def add_subexpressions_for_divisions(ac):
r"""Introduces subexpressions for all divisions which have no constant in the denominator.
......@@ -112,14 +147,14 @@ def add_subexpressions_for_sums(ac):
addends = []
def contains_sum(term):
if term.func == sp.add.Add:
if term.func == sp.Add:
return True
if term.is_Atom:
return False
return any([contains_sum(a) for a in term.args])
def search_addends(term):
if term.func == sp.add.Add:
if term.func == sp.Add:
if all([not contains_sum(a) for a in term.args]):
addends.extend(term.args)
for a in term.args:
......@@ -128,18 +163,20 @@ def add_subexpressions_for_sums(ac):
for eq in ac.all_assignments:
search_addends(eq.rhs)
addends = [a for a in addends if not isinstance(a, sp.Symbol) or isinstance(a, AbstractField.AbstractAccess)]
addends = [a for a in addends if not isinstance(a, sp.Symbol) or isinstance(a, Field.Access)]
new_symbol_gen = ac.subexpression_symbol_generator
substitutions = {addend: new_symbol for new_symbol, addend in zip(new_symbol_gen, addends)}
return ac.new_with_substitutions(substitutions, True, substitute_on_lhs=False)
def add_subexpressions_for_field_reads(ac, subexpressions=True, main_assignments=True):
def add_subexpressions_for_field_reads(ac, subexpressions=True, main_assignments=True, data_type=None):
r"""Substitutes field accesses on rhs of assignments with subexpressions
Can change semantics of the update rule (which is the goal of this transformation)
This is useful if a field should be update in place - all values are loaded before into subexpression variables,
then the new values are computed and written to the same field in-place.
Additionally, if a datatype is given to the function the rhs symbol of the new isolated field read will have
this data type. This is useful for mixed precision kernels
"""
field_reads = set()
to_iterate = []
......@@ -151,7 +188,17 @@ def add_subexpressions_for_field_reads(ac, subexpressions=True, main_assignments
for assignment in to_iterate:
if hasattr(assignment, 'lhs') and hasattr(assignment, 'rhs'):
field_reads.update(assignment.rhs.atoms(Field.Access))
substitutions = {fa: next(ac.subexpression_symbol_generator) for fa in field_reads}
if not field_reads:
return ac
substitutions = dict()
for fa in field_reads:
lhs = next(ac.subexpression_symbol_generator)
if data_type is not None:
substitutions.update({fa: TypedSymbol(lhs.name, data_type)})
else:
substitutions.update({fa: lhs})
return ac.new_with_substitutions(substitutions, add_substitutions_as_subexpressions=True,
substitute_on_lhs=False, sort_topologically=False)
......@@ -172,7 +219,7 @@ def transform_lhs_and_rhs(assignment_list, transformation, *args, **kwargs):
def apply_to_all_assignments(operation: Callable[[sp.Expr], sp.Expr]):
"""Applies sympy expand operation to all equations in collection."""
"""Applies a given operation to all equations in collection."""
def f(ac):
return ac.copy(transform_rhs(ac.main_assignments, operation))
......@@ -189,3 +236,31 @@ def apply_on_all_subexpressions(operation: Callable[[sp.Expr], sp.Expr]):
f.__name__ = operation.__name__
return f
# TODO Markus
# make this really work for Assignmentcollections
# this function should ONLY evaluate
# do the optims_c99 elsewhere optionally
# def apply_sympy_optimisations(ac: AssignmentCollection):
# """ Evaluates constant expressions (e.g. :math:`\\sqrt{3}` will be replaced by its floating point representation)
# and applies the default sympy optimisations. See sympy.codegen.rewriting
# """
#
# # Evaluates all constant terms
#
# assignments = ac.all_assignments
#
# evaluate_constant_terms = ReplaceOptim(lambda e: hasattr(e, 'is_constant') and e.is_constant and not e.is_integer,
# lambda p: p.evalf())
#
# sympy_optimisations = [evaluate_constant_terms] + list(optims_c99)
#
# assignments = [Assignment(a.lhs, optimize(a.rhs, sympy_optimisations))
# if hasattr(a, 'lhs')
# else a for a in assignments]
# assignments_nodes = [a.atoms(SympyAssignment) for a in assignments]
# for a in chain.from_iterable(assignments_nodes):
# a.optimize(sympy_optimisations)
#
# return AssignmentCollection(assignments)