Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 1774 additions and 354 deletions
import warnings import warnings
from typing import Container, Union from typing import Container, Union
import numpy as np
import sympy as sp import sympy as sp
from sympy.logic.boolalg import BooleanFunction, BooleanAtom
import pystencils.astnodes as ast import pystencils.astnodes as ast
from pystencils.backends.simd_instruction_sets import get_vector_instruction_set from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set
from pystencils.data_types import ( from pystencils.typing import (BasicType, PointerType, TypedSymbol, VectorType, CastFunc, collate_types,
PointerType, TypedSymbol, VectorType, cast_func, collate_types, get_type_of_expression, get_type_of_expression, VectorMemoryAccess)
vector_memory_access) from pystencils.functions import DivFunc
from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
from pystencils.field import Field from pystencils.field import Field
from pystencils.integer_functions import modulo_ceil, modulo_floor from pystencils.integer_functions import modulo_ceil, modulo_floor
from pystencils.sympyextensions import fast_subs from pystencils.sympyextensions import fast_subs
from pystencils.transformations import ( from pystencils.transformations import cut_loop, filtered_tree_iteration, replace_inner_stride_with_one
cut_loop, filtered_tree_iteration, replace_inner_stride_with_one)
# noinspection PyPep8Naming # noinspection PyPep8Naming
class vec_any(sp.Function): class vec_any(sp.Function):
nargs = (1, ) nargs = (1,)
# noinspection PyPep8Naming # noinspection PyPep8Naming
class vec_all(sp.Function): class vec_all(sp.Function):
nargs = (1, ) nargs = (1,)
def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx', class NontemporalFence(ast.Node):
def __init__(self):
super(NontemporalFence, self).__init__(parent=None)
@property
def symbols_defined(self):
return set()
@property
def undefined_symbols(self):
return set()
@property
def args(self):
return []
def __eq__(self, other):
return isinstance(other, NontemporalFence)
class CachelineSize(ast.Node):
symbol = sp.Symbol("_clsize")
mask_symbol = sp.Symbol("_clsize_mask")
last_symbol = sp.Symbol("_cl_lastvec")
def __init__(self):
super(CachelineSize, self).__init__(parent=None)
@property
def symbols_defined(self):
return {self.symbol, self.mask_symbol, self.last_symbol}
@property
def undefined_symbols(self):
return set()
@property
def args(self):
return []
def __eq__(self, other):
return isinstance(other, CachelineSize)
def __hash__(self):
return hash(self.symbol)
def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best',
assume_aligned: bool = False, nontemporal: Union[bool, Container[Union[str, Field]]] = False, assume_aligned: bool = False, nontemporal: Union[bool, Container[Union[str, Field]]] = False,
assume_inner_stride_one: bool = False, assume_sufficient_line_padding: bool = True): assume_inner_stride_one: bool = False, assume_sufficient_line_padding: bool = True):
# TODO Vectorization Revamp we first introduce the remainder loop and then check if we can even vectorise.
# Maybe first copy the ast and return the copied version on failure
"""Explicit vectorization using SIMD vectorization via intrinsics. """Explicit vectorization using SIMD vectorization via intrinsics.
Args: Args:
...@@ -51,9 +100,14 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx', ...@@ -51,9 +100,14 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx',
depending on the access pattern there might be additional padding depending on the access pattern there might be additional padding
required at the end of the array required at the end of the array
""" """
if instruction_set == 'best':
if get_supported_instruction_sets():
instruction_set = get_supported_instruction_sets()[-1]
else:
instruction_set = 'avx'
if instruction_set is None: if instruction_set is None:
return return
all_fields = kernel_ast.fields_accessed all_fields = kernel_ast.fields_accessed
if nontemporal is None or nontemporal is False: if nontemporal is None or nontemporal is False:
nontemporal = {} nontemporal = {}
...@@ -69,39 +123,53 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx', ...@@ -69,39 +123,53 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx',
"to differently typed floating point fields") "to differently typed floating point fields")
float_size = field_float_dtypes.pop().numpy_dtype.itemsize float_size = field_float_dtypes.pop().numpy_dtype.itemsize
assert float_size in (8, 4) assert float_size in (8, 4)
vector_is = get_vector_instruction_set('double' if float_size == 8 else 'float', default_float_type = 'float64' if float_size == 8 else 'float32'
instruction_set=instruction_set) vector_is = get_vector_instruction_set(default_float_type, instruction_set=instruction_set)
vector_width = vector_is['width']
kernel_ast.instruction_set = vector_is kernel_ast.instruction_set = vector_is
vectorize_inner_loops_and_adapt_load_stores(kernel_ast, vector_width, assume_aligned, if nontemporal and 'cachelineZero' in vector_is:
nontemporal, assume_sufficient_line_padding) kernel_ast.use_all_written_field_sizes = True
insert_vector_casts(kernel_ast) strided = 'storeS' in vector_is and 'loadS' in vector_is
keep_loop_stop = '{loop_stop}' in vector_is['storeA' if assume_aligned and 'storeA' in vector_is else 'storeU']
vectorize_inner_loops_and_adapt_load_stores(kernel_ast, assume_aligned, nontemporal,
strided, keep_loop_stop, assume_sufficient_line_padding,
default_float_type)
def vectorize_inner_loops_and_adapt_load_stores(ast_node, vector_width, assume_aligned, nontemporal_fields, def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontemporal_fields,
assume_sufficient_line_padding): strided, keep_loop_stop, assume_sufficient_line_padding,
default_float_type):
"""Goes over all innermost loops, changes increment to vector width and replaces field accesses by vector type.""" """Goes over all innermost loops, changes increment to vector width and replaces field accesses by vector type."""
all_loops = filtered_tree_iteration(ast_node, ast.LoopOverCoordinate, stop_type=ast.SympyAssignment) all_loops = list(filtered_tree_iteration(ast_node, ast.LoopOverCoordinate, stop_type=ast.SympyAssignment))
inner_loops = [n for n in all_loops if n.is_innermost_loop] inner_loops = [loop for loop in all_loops if loop.is_innermost_loop]
zero_loop_counters = {l.loop_counter_symbol: 0 for l in all_loops} zero_loop_counters = {loop.loop_counter_symbol: 0 for loop in all_loops}
vector_is = ast_node.instruction_set
assert vector_is, "The ast needs to hold information about the instruction_set for the vectorisation"
vector_width = vector_is['width']
vector_int_width = vector_is['intwidth']
for loop_node in inner_loops: for loop_node in inner_loops:
loop_range = loop_node.stop - loop_node.start loop_range = loop_node.stop - loop_node.start
# cut off loop tail, that is not a multiple of four # cut off loop tail, that is not a multiple of four
if assume_aligned and assume_sufficient_line_padding: if keep_loop_stop:
pass
elif assume_aligned and assume_sufficient_line_padding:
loop_range = loop_node.stop - loop_node.start loop_range = loop_node.stop - loop_node.start
new_stop = loop_node.start + modulo_ceil(loop_range, vector_width) new_stop = loop_node.start + modulo_ceil(loop_range, vector_width)
loop_node.stop = new_stop loop_node.stop = new_stop
else: else:
cutting_point = modulo_floor(loop_range, vector_width) + loop_node.start cutting_point = modulo_floor(loop_range, vector_width) + loop_node.start
loop_nodes = [l for l in cut_loop(loop_node, [cutting_point]).args if isinstance(l, ast.LoopOverCoordinate)] # TODO cut_loop calls deepcopy on the loop_node. This is bad as documented in cut_loop
loop_nodes = [loop for loop in cut_loop(loop_node, [cutting_point]).args
if isinstance(loop, ast.LoopOverCoordinate)]
assert len(loop_nodes) in (0, 1, 2) # 2 for main and tail loop, 1 if loop range divisible by vector width assert len(loop_nodes) in (0, 1, 2) # 2 for main and tail loop, 1 if loop range divisible by vector width
if len(loop_nodes) == 0: if len(loop_nodes) == 0:
continue continue
loop_node = loop_nodes[0] loop_node = loop_nodes[0]
# loop_node is the vectorized one
# Find all array accesses (indexed) that depend on the loop counter as offset # Find all array accesses (indexed) that depend on the loop counter as offset
loop_counter_symbol = ast.LoopOverCoordinate.get_loop_counter_symbol(loop_node.coordinate_to_loop_over) loop_counter_symbol = ast.LoopOverCoordinate.get_loop_counter_symbol(loop_node.coordinate_to_loop_over)
substitutions = {} substitutions = {}
...@@ -109,54 +177,184 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, vector_width, assume_a ...@@ -109,54 +177,184 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, vector_width, assume_a
for indexed in loop_node.atoms(sp.Indexed): for indexed in loop_node.atoms(sp.Indexed):
base, index = indexed.args base, index = indexed.args
if loop_counter_symbol in index.atoms(sp.Symbol): if loop_counter_symbol in index.atoms(sp.Symbol):
if 'loadA' not in vector_is and 'storeA' not in vector_is and 'maskStoreA' not in vector_is:
# don't need to generate the alignment check when there are no aligned load/store instructions
aligned_access = False
else:
if not isinstance(vector_width, int):
raise NotImplementedError('Access alignment cannot be statically determined for sizeless '
'vector ISAs')
aligned_access = (index - loop_counter_symbol).subs(zero_loop_counters) % vector_width == 0
loop_counter_is_offset = loop_counter_symbol not in (index - loop_counter_symbol).atoms() loop_counter_is_offset = loop_counter_symbol not in (index - loop_counter_symbol).atoms()
aligned_access = (index - loop_counter_symbol).subs(zero_loop_counters) == 0 stride = sp.simplify(index.subs({loop_counter_symbol: loop_counter_symbol + 1}) - index)
if not loop_counter_is_offset: if not loop_counter_is_offset and (not strided or loop_counter_symbol in stride.atoms()):
successful = False successful = False
break break
typed_symbol = base.label typed_symbol = base.label
assert type(typed_symbol.dtype) is PointerType, \ assert type(typed_symbol.dtype) is PointerType, f"Type of access is {typed_symbol.dtype}, {indexed}"
"Type of access is {}, {}".format(typed_symbol.dtype, indexed)
vec_type = VectorType(typed_symbol.dtype.base_type, vector_width) vec_type = VectorType(typed_symbol.dtype.base_type, vector_width)
use_aligned_access = aligned_access and assume_aligned use_aligned_access = aligned_access and assume_aligned
nontemporal = False nontemporal = False
if hasattr(indexed, 'field'): if hasattr(indexed, 'field'):
nontemporal = (indexed.field in nontemporal_fields) or (indexed.field.name in nontemporal_fields) nontemporal = (indexed.field in nontemporal_fields) or (indexed.field.name in nontemporal_fields)
substitutions[indexed] = vector_memory_access(indexed, vec_type, use_aligned_access, nontemporal) substitutions[indexed] = VectorMemoryAccess(indexed, vec_type, use_aligned_access, nontemporal, True,
stride if strided else 1)
if nontemporal:
# insert NontemporalFence after the outermost loop
parent = loop_node.parent
while type(parent.parent.parent) is not ast.KernelFunction:
parent = parent.parent
parent.parent.insert_after(NontemporalFence(), parent, if_not_exists=True)
# insert CachelineSize at the beginning of the kernel
parent.parent.insert_front(CachelineSize(), if_not_exists=True)
if not successful: if not successful:
warnings.warn("Could not vectorize loop because of non-consecutive memory access") warnings.warn("Could not vectorize loop because of non-consecutive memory access")
continue continue
loop_node.step = vector_width loop_node.step = vector_width
loop_node.subs(substitutions) loop_node.subs(substitutions)
arg_1 = CastFunc(loop_counter_symbol, VectorType(loop_counter_symbol.dtype, vector_int_width))
arg_2 = CastFunc(tuple(range(vector_int_width if type(vector_int_width) is int else 2)),
VectorType(loop_counter_symbol.dtype, vector_int_width))
vector_loop_counter = arg_1 + arg_2
fast_subs(loop_node, {loop_counter_symbol: vector_loop_counter},
skip=lambda e: isinstance(e, ast.ResolvedFieldAccess) or isinstance(e, VectorMemoryAccess))
mask_conditionals(loop_node)
def insert_vector_casts(ast_node): from pystencils.rng import RNGBase
substitutions = {}
for rng in loop_node.atoms(RNGBase):
new_result_symbols = [TypedSymbol(s.name, VectorType(s.dtype, width=vector_width))
for s in rng.result_symbols]
substitutions.update({s[0]: s[1] for s in zip(rng.result_symbols, new_result_symbols)})
rng._symbols_defined = set(new_result_symbols)
fast_subs(loop_node, substitutions, skip=lambda e: isinstance(e, RNGBase))
insert_vector_casts(loop_node, vector_is, default_float_type)
def mask_conditionals(loop_body):
def visit_node(node, mask):
if isinstance(node, ast.Conditional):
cond = node.condition_expr
skip = (loop_body.loop_counter_symbol not in cond.atoms(sp.Symbol)) or cond.func in (vec_all, vec_any)
cond = True if skip else cond
true_mask = sp.And(cond, mask)
visit_node(node.true_block, true_mask)
if node.false_block:
false_mask = sp.And(sp.Not(node.condition_expr), mask)
visit_node(node, false_mask)
if not skip:
node.condition_expr = vec_any(node.condition_expr)
elif isinstance(node, ast.SympyAssignment):
if mask is not True:
s = {ma: VectorMemoryAccess(*ma.args[0:4], sp.And(mask, ma.args[4]), *ma.args[5:])
for ma in node.atoms(VectorMemoryAccess)}
node.subs(s)
else:
for arg in node.args:
visit_node(arg, mask)
visit_node(loop_body, mask=True)
def insert_vector_casts(ast_node, instruction_set, default_float_type='double'):
"""Inserts necessary casts from scalar values to vector values.""" """Inserts necessary casts from scalar values to vector values."""
handled_functions = (sp.Add, sp.Mul, fast_division, fast_sqrt, fast_inv_sqrt, vec_any, vec_all) handled_functions = (sp.Add, sp.Mul, vec_any, vec_all, DivFunc, sp.Abs)
def visit_expr(expr): def is_scalar(expr) -> bool:
if hasattr(expr, "dtype"):
if type(expr.dtype) is VectorType:
return False
# Else branch: If expr is a CastFunc, then whether the expression
# is scalar is determined by the argument (remember: vector casts
# are not inserted yet). Therefore, we must recurse into the args of
# expr below. Otherwise, this expression is atomic and in that case
# it is assumed to be scalar below.
if isinstance(expr, cast_func) or isinstance(expr, vector_memory_access): if isinstance(expr, ast.ResolvedFieldAccess):
return expr # expr.field is not in expr.args
elif expr.func in handled_functions or isinstance(expr, sp.Rel) or isinstance(expr, sp.boolalg.BooleanFunction): return is_scalar(expr.field)
new_args = [visit_expr(a) for a in expr.args] elif isinstance(expr, (vec_any, vec_all)):
arg_types = [get_type_of_expression(a) for a in new_args] return True
if not hasattr(expr, "args"):
return True
return all(is_scalar(arg) for arg in expr.args)
# TODO Vectorization Revamp: get rid of default_type
def visit_expr(expr, default_type='double', force_vectorize=False):
if isinstance(expr, VectorMemoryAccess):
return VectorMemoryAccess(*expr.args[0:4], visit_expr(expr.args[4], default_type, force_vectorize),
*expr.args[5:])
elif isinstance(expr, CastFunc):
cast_type = expr.args[1]
arg = visit_expr(expr.args[0], default_type, force_vectorize)
assert cast_type in [BasicType('float32'), BasicType('float64')], \
f'Vectorization cannot vectorize type {cast_type}'
return expr.func(arg, VectorType(cast_type, instruction_set['width']))
elif expr.func is sp.Abs and 'abs' not in instruction_set:
new_arg = visit_expr(expr.args[0], default_type, force_vectorize)
base_type = get_type_of_expression(expr.args[0]).base_type if type(expr.args[0]) is VectorMemoryAccess \
else get_type_of_expression(expr.args[0])
pw = sp.Piecewise((-new_arg, new_arg < CastFunc(0, base_type.numpy_dtype)),
(new_arg, True))
return visit_expr(pw, default_type, force_vectorize)
elif expr.func in handled_functions or isinstance(expr, sp.Rel) or isinstance(expr, BooleanFunction):
if expr.func is sp.Mul and expr.args[0] == -1:
# special treatment for the unary minus: make sure that the -1 has the same type as the argument
dtype = int
for arg in expr.atoms(VectorMemoryAccess):
if arg.dtype.base_type.is_float():
dtype = arg.dtype.base_type.numpy_dtype.type
for arg in expr.atoms(TypedSymbol):
if type(arg.dtype) is VectorType and arg.dtype.base_type.is_float():
dtype = arg.dtype.base_type.numpy_dtype.type
if dtype is not int:
if dtype is np.float32:
default_type = 'float'
expr = sp.Mul(dtype(expr.args[0]), *expr.args[1:])
new_args = [visit_expr(a, default_type, force_vectorize) for a in expr.args]
arg_types = [get_type_of_expression(a, default_float_type=default_type) for a in new_args]
if not any(type(t) is VectorType for t in arg_types): if not any(type(t) is VectorType for t in arg_types):
return expr return expr
else: else:
target_type = collate_types(arg_types) target_type = collate_types(arg_types)
casted_args = [cast_func(a, target_type) if t != target_type else a casted_args = [
for a, t in zip(new_args, arg_types)] CastFunc(a, target_type) if t != target_type and not isinstance(a, VectorMemoryAccess) else a
for a, t in zip(new_args, arg_types)]
return expr.func(*casted_args) return expr.func(*casted_args)
elif expr.func is sp.UnevaluatedExpr:
assert expr.args[0].is_Pow or expr.args[0].is_Mul, "UnevaluatedExpr only implemented holding Mul or Pow"
# TODO this is only because cut_loop evaluates the multiplications again due to deepcopy. All this should
# TODO be fixed for real at some point.
if expr.args[0].is_Pow:
base = expr.args[0].base
exp = expr.args[0].exp
expr = sp.UnevaluatedExpr(sp.Mul(*([base] * +exp), evaluate=False))
new_args = [visit_expr(a, default_type, force_vectorize) for a in expr.args[0].args]
arg_types = [get_type_of_expression(a, default_float_type=default_type) for a in new_args]
target_type = collate_types(arg_types)
if not any(type(t) is VectorType for t in arg_types):
target_type = VectorType(target_type, instruction_set['width'])
casted_args = [
CastFunc(a, target_type) if t != target_type and not isinstance(a, VectorMemoryAccess) else a
for a, t in zip(new_args, arg_types)]
return expr.func(expr.args[0].func(*casted_args, evaluate=False))
elif expr.func is sp.Pow: elif expr.func is sp.Pow:
new_arg = visit_expr(expr.args[0]) new_arg = visit_expr(expr.args[0], default_type, force_vectorize)
return expr.func(new_arg, expr.args[1]) return expr.func(new_arg, expr.args[1])
elif expr.func == sp.Piecewise: elif expr.func == sp.Piecewise:
new_results = [visit_expr(a[0]) for a in expr.args] new_results = [visit_expr(a[0], default_type, force_vectorize) for a in expr.args]
new_conditions = [visit_expr(a[1]) for a in expr.args] new_conditions = [visit_expr(a[1], default_type, force_vectorize) for a in expr.args]
types_of_results = [get_type_of_expression(a) for a in new_results] types_of_results = [get_type_of_expression(a) for a in new_results]
types_of_conditions = [get_type_of_expression(a) for a in new_conditions] types_of_conditions = [get_type_of_expression(a) for a in new_conditions]
...@@ -167,43 +365,61 @@ def insert_vector_casts(ast_node): ...@@ -167,43 +365,61 @@ def insert_vector_casts(ast_node):
if type(condition_target_type) is not VectorType and type(result_target_type) is VectorType: if type(condition_target_type) is not VectorType and type(result_target_type) is VectorType:
condition_target_type = VectorType(condition_target_type, width=result_target_type.width) condition_target_type = VectorType(condition_target_type, width=result_target_type.width)
casted_results = [cast_func(a, result_target_type) if t != result_target_type else a casted_results = [CastFunc(a, result_target_type) if t != result_target_type else a
for a, t in zip(new_results, types_of_results)] for a, t in zip(new_results, types_of_results)]
casted_conditions = [cast_func(a, condition_target_type) casted_conditions = [CastFunc(a, condition_target_type)
if t != condition_target_type and a is not True else a if t != condition_target_type and a is not True else a
for a, t in zip(new_conditions, types_of_conditions)] for a, t in zip(new_conditions, types_of_conditions)]
return sp.Piecewise(*[(r, c) for r, c in zip(casted_results, casted_conditions)]) return sp.Piecewise(*[(r, c) for r, c in zip(casted_results, casted_conditions)])
else: elif isinstance(expr, TypedSymbol):
if force_vectorize:
expr_type = get_type_of_expression(expr)
if type(expr_type) is not VectorType:
vector_type = VectorType(expr_type, instruction_set['width'])
return CastFunc(expr, vector_type)
return expr return expr
elif isinstance(expr, (sp.Number, BooleanAtom)):
return expr
else:
raise NotImplementedError(f'Due to defensive programming we handle only specific expressions.\n'
f'The expression {expr} of type {type(expr)} is not known yet.')
def visit_node(node, substitution_dict): def visit_node(node, substitution_dict, default_type='double'):
substitution_dict = substitution_dict.copy() substitution_dict = substitution_dict.copy()
for arg in node.args: for arg in node.args:
if isinstance(arg, ast.SympyAssignment): if isinstance(arg, ast.SympyAssignment):
assignment = arg assignment = arg
# If there is a remainder loop we do not vectorise it, thus lhs will indicate this
# if isinstance(assignment.lhs, ast.ResolvedFieldAccess):
# continue
subs_expr = fast_subs(assignment.rhs, substitution_dict, subs_expr = fast_subs(assignment.rhs, substitution_dict,
skip=lambda e: isinstance(e, ast.ResolvedFieldAccess)) skip=lambda e: isinstance(e, ast.ResolvedFieldAccess))
assignment.rhs = visit_expr(subs_expr)
rhs_type = get_type_of_expression(assignment.rhs) # If either side contains a vectorized subexpression, both sides
# must be fully vectorized.
lhs_scalar = is_scalar(assignment.lhs)
rhs_scalar = is_scalar(subs_expr)
assignment.rhs = visit_expr(subs_expr, default_type, force_vectorize=not (lhs_scalar and rhs_scalar))
if isinstance(assignment.lhs, TypedSymbol): if isinstance(assignment.lhs, TypedSymbol):
lhs_type = assignment.lhs.dtype if lhs_scalar and not rhs_scalar:
if type(rhs_type) is VectorType and type(lhs_type) is not VectorType: lhs_type = get_type_of_expression(assignment.lhs)
rhs_type = get_type_of_expression(assignment.rhs)
new_lhs_type = VectorType(lhs_type, rhs_type.width) new_lhs_type = VectorType(lhs_type, rhs_type.width)
new_lhs = TypedSymbol(assignment.lhs.name, new_lhs_type) new_lhs = TypedSymbol(assignment.lhs.name, new_lhs_type)
substitution_dict[assignment.lhs] = new_lhs substitution_dict[assignment.lhs] = new_lhs
assignment.lhs = new_lhs assignment.lhs = new_lhs
elif isinstance(assignment.lhs.func, cast_func): elif isinstance(assignment.lhs, VectorMemoryAccess):
lhs_type = assignment.lhs.args[1] assignment.lhs = visit_expr(assignment.lhs, default_type)
if type(lhs_type) is VectorType and type(rhs_type) is not VectorType:
assignment.rhs = cast_func(assignment.rhs, lhs_type)
elif isinstance(arg, ast.Conditional): elif isinstance(arg, ast.Conditional):
arg.condition_expr = fast_subs(arg.condition_expr, substitution_dict, arg.condition_expr = fast_subs(arg.condition_expr, substitution_dict,
skip=lambda e: isinstance(e, ast.ResolvedFieldAccess)) skip=lambda e: isinstance(e, ast.ResolvedFieldAccess))
arg.condition_expr = visit_expr(arg.condition_expr) arg.condition_expr = visit_expr(arg.condition_expr, default_type)
visit_node(arg, substitution_dict) visit_node(arg, substitution_dict, default_type)
else: else:
visit_node(arg, substitution_dict) visit_node(arg, substitution_dict, default_type)
visit_node(ast_node, {}) visit_node(ast_node, {}, default_float_type)
import warnings
from typing import Tuple, Union from typing import Tuple, Union
from .datahandling_interface import DataHandling from .datahandling_interface import DataHandling
from ..enums import Target
from .serial_datahandling import SerialDataHandling from .serial_datahandling import SerialDataHandling
try: try:
...@@ -18,9 +21,10 @@ except ImportError: ...@@ -18,9 +21,10 @@ except ImportError:
def create_data_handling(domain_size: Tuple[int, ...], def create_data_handling(domain_size: Tuple[int, ...],
periodicity: Union[bool, Tuple[bool, ...]] = False, periodicity: Union[bool, Tuple[bool, ...]] = False,
default_layout: str = 'SoA', default_layout: str = 'SoA',
default_target: str = 'cpu', default_target: Target = Target.CPU,
parallel: bool = False, parallel: bool = False,
default_ghost_layers: int = 1) -> DataHandling: default_ghost_layers: int = 1,
device_number: Union[int, None] = None) -> DataHandling:
"""Creates a data handling instance. """Creates a data handling instance.
Args: Args:
...@@ -28,10 +32,19 @@ def create_data_handling(domain_size: Tuple[int, ...], ...@@ -28,10 +32,19 @@ def create_data_handling(domain_size: Tuple[int, ...],
periodicity: either True, False for full or no periodicity or a tuple of booleans indicating periodicity periodicity: either True, False for full or no periodicity or a tuple of booleans indicating periodicity
for each coordinate for each coordinate
default_layout: default array layout, that is used if not explicitly specified in 'add_array' default_layout: default array layout, that is used if not explicitly specified in 'add_array'
default_target: either 'cpu' or 'gpu' default_target: `Target`
parallel: if True a parallel domain is created using walberla - each MPI process gets a part of the domain parallel: if True a parallel domain is created using walberla - each MPI process gets a part of the domain
default_ghost_layers: default number of ghost layers if not overwritten in 'add_array' default_ghost_layers: default number of ghost layers if not overwritten in 'add_array'
device_number: If `default_target` is set to 'GPU' and `parallel` is False, a device number should be
specified. If none is given, the device with the largest amount of memory is used. If multiple
devices have the same amount of memory, the one with the lower number is used
""" """
if isinstance(default_target, str):
new_target = Target[default_target.upper()]
warnings.warn(f'Target "{default_target}" as str is deprecated. Use {new_target} instead',
category=DeprecationWarning)
default_target = new_target
if parallel: if parallel:
if wlb is None: if wlb is None:
raise ValueError("Cannot create parallel data handling because walberla module is not available") raise ValueError("Cannot create parallel data handling because walberla module is not available")
...@@ -56,8 +69,12 @@ def create_data_handling(domain_size: Tuple[int, ...], ...@@ -56,8 +69,12 @@ def create_data_handling(domain_size: Tuple[int, ...],
return ParallelDataHandling(blocks=block_storage, dim=dim, default_target=default_target, return ParallelDataHandling(blocks=block_storage, dim=dim, default_target=default_target,
default_layout=default_layout, default_ghost_layers=default_ghost_layers) default_layout=default_layout, default_ghost_layers=default_ghost_layers)
else: else:
return SerialDataHandling(domain_size, periodicity=periodicity, default_target=default_target, return SerialDataHandling(domain_size,
default_layout=default_layout, default_ghost_layers=default_ghost_layers) periodicity=periodicity,
default_target=default_target,
default_layout=default_layout,
default_ghost_layers=default_ghost_layers,
device_number=device_number)
__all__ = ['create_data_handling'] __all__ = ['create_data_handling']
...@@ -111,15 +111,15 @@ class ParallelBlock(Block): ...@@ -111,15 +111,15 @@ class ParallelBlock(Block):
def __getitem__(self, data_name): def __getitem__(self, data_name):
result = self._block[self._name_prefix + data_name] result = self._block[self._name_prefix + data_name]
type_name = type(result).__name__ type_name = type(result).__name__
if type_name == 'GhostLayerField': if 'GhostLayerField' in type_name:
result = wlb.field.toArray(result, withGhostLayers=self._gls) result = wlb.field.toArray(result, with_ghost_layers=self._gls)
result = self._normalize_array_shape(result) result = self._normalize_array_shape(result)
elif type_name == 'GpuField': elif 'GpuField' in type_name:
result = wlb.cuda.toGpuArray(result, withGhostLayers=self._gls) result = wlb.gpu.toGpuArray(result, with_ghost_layers=self._gls)
result = self._normalize_array_shape(result) result = self._normalize_array_shape(result)
return result return result
def _normalize_array_shape(self, arr): def _normalize_array_shape(self, arr):
if arr.shape[-1] == 1: if arr.shape[-1] == 1 and len(arr.shape) == 4:
arr = arr[..., 0] arr = arr[..., 0]
return arr[self._localSlice] return arr[self._localSlice]
...@@ -3,7 +3,8 @@ from typing import Callable, Dict, Iterable, Optional, Sequence, Tuple, Union ...@@ -3,7 +3,8 @@ from typing import Callable, Dict, Iterable, Optional, Sequence, Tuple, Union
import numpy as np import numpy as np
from pystencils.field import Field from pystencils.enums import Target, Backend
from pystencils.field import Field, FieldType
class DataHandling(ABC): class DataHandling(ABC):
...@@ -16,7 +17,14 @@ class DataHandling(ABC): ...@@ -16,7 +17,14 @@ class DataHandling(ABC):
'gather' function that has collects (parts of the) distributed data on a single process. 'gather' function that has collects (parts of the) distributed data on a single process.
""" """
_GPU_LIKE_TARGETS = [Target.GPU]
_GPU_LIKE_BACKENDS = [Backend.CUDA]
# ---------------------------- Adding and accessing data ----------------------------------------------------------- # ---------------------------- Adding and accessing data -----------------------------------------------------------
@property
@abstractmethod
def default_target(self) -> Target:
"""Target Enum indicating the target of the computation"""
@property @property
@abstractmethod @abstractmethod
...@@ -36,7 +44,7 @@ class DataHandling(ABC): ...@@ -36,7 +44,7 @@ class DataHandling(ABC):
@abstractmethod @abstractmethod
def add_array(self, name: str, values_per_cell, dtype=np.float64, def add_array(self, name: str, values_per_cell, dtype=np.float64,
latex_name: Optional[str] = None, ghost_layers: Optional[int] = None, layout: Optional[str] = None, latex_name: Optional[str] = None, ghost_layers: Optional[int] = None, layout: Optional[str] = None,
cpu: bool = True, gpu: Optional[bool] = None, alignment=False) -> Field: cpu: bool = True, gpu: Optional[bool] = None, alignment=False, field_type=FieldType.GENERIC) -> Field:
"""Adds a (possibly distributed) array to the handling that can be accessed using the given name. """Adds a (possibly distributed) array to the handling that can be accessed using the given name.
For each array a symbolic field is available via the 'fields' dictionary For each array a symbolic field is available via the 'fields' dictionary
...@@ -53,12 +61,63 @@ class DataHandling(ABC): ...@@ -53,12 +61,63 @@ class DataHandling(ABC):
layout: memory layout of array, either structure of arrays 'SoA' or array of structures 'AoS'. layout: memory layout of array, either structure of arrays 'SoA' or array of structures 'AoS'.
this is only important if values_per_cell > 1 this is only important if values_per_cell > 1
cpu: allocate field on the CPU cpu: allocate field on the CPU
gpu: allocate field on the GPU, if None, a GPU field is allocated if default_target is 'gpu' gpu: allocate field on the GPU, if None, a GPU field is allocated if default_target is 'GPU'
alignment: either False for no alignment, or the number of bytes to align to alignment: either False for no alignment, or the number of bytes to align to
Returns: Returns:
pystencils field, that can be used to formulate symbolic kernels pystencils field, that can be used to formulate symbolic kernels
""" """
def add_arrays(self,
description: str,
dtype=np.float64,
ghost_layers: Optional[int] = None,
layout: Optional[str] = None,
cpu: bool = True,
gpu: Optional[bool] = None,
alignment=False,
field_type=FieldType.GENERIC) -> Tuple[Field]:
"""Adds multiple arrays using a string description similar to :func:`pystencils.fields`
>>> from pystencils.datahandling import create_data_handling
>>> dh = create_data_handling((20, 30))
>>> x, y =dh.add_arrays('x, y(9)')
>>> print(dh.fields)
{'x': x: double[22,32], 'y': y(9): double[22,32]}
>>> assert x == dh.fields['x']
>>> assert dh.fields['x'].shape == (22, 32)
>>> assert dh.fields['y'].index_shape == (9,)
Args:
description (str): String description of the fields to add
dtype: data type of the array as numpy data type
ghost_layers: number of ghost layers - if not specified a default value specified in the constructor
is used
layout: memory layout of array, either structure of arrays 'SoA' or array of structures 'AoS'.
this is only important if values_per_cell > 1
cpu: allocate field on the CPU
gpu: allocate field on the GPU, if None, a GPU field is allocated if default_target is 'GPU'
alignment: either False for no alignment, or the number of bytes to align to
Returns:
Fields representing the just created arrays
"""
from pystencils.field import _parse_part1
names = []
for name, indices in _parse_part1(description):
names.append(name)
self.add_array(name,
values_per_cell=indices,
dtype=dtype,
ghost_layers=ghost_layers,
layout=layout,
cpu=cpu,
gpu=gpu,
alignment=alignment,
field_type=field_type)
return (self.fields[n] for n in names)
@abstractmethod @abstractmethod
def has_data(self, name): def has_data(self, name):
"""Returns true if a field or custom data element with this name was added.""" """Returns true if a field or custom data element with this name was added."""
...@@ -153,6 +212,10 @@ class DataHandling(ABC): ...@@ -153,6 +212,10 @@ class DataHandling(ABC):
directly passed to the kernel function and override possible parameters from the DataHandling directly passed to the kernel function and override possible parameters from the DataHandling
""" """
@abstractmethod
def get_kernel_kwargs(self, kernel_function, **kwargs):
"""Returns the input arguments of a kernel"""
@abstractmethod @abstractmethod
def swap(self, name1, name2, gpu=False): def swap(self, name1, name2, gpu=False):
"""Swaps data of two arrays""" """Swaps data of two arrays"""
...@@ -222,7 +285,7 @@ class DataHandling(ABC): ...@@ -222,7 +285,7 @@ class DataHandling(ABC):
names: what data to synchronize: name of array or sequence of names names: what data to synchronize: name of array or sequence of names
stencil: stencil as string defining which neighbors are synchronized e.g. 'D2Q9', 'D3Q19' stencil: stencil as string defining which neighbors are synchronized e.g. 'D2Q9', 'D3Q19'
if None, a full synchronization (i.e. D2Q9 or D3Q27) is done if None, a full synchronization (i.e. D2Q9 or D3Q27) is done
target: either 'cpu' or 'gpu target: `Target` either 'CPU' or 'GPU'
kwargs: implementation specific, optional optimization parameters for communication kwargs: implementation specific, optional optimization parameters for communication
Returns: Returns:
...@@ -268,6 +331,7 @@ class DataHandling(ABC): ...@@ -268,6 +331,7 @@ class DataHandling(ABC):
b[array_name][(Ellipsis, *value_idx)].fill(val) b[array_name][(Ellipsis, *value_idx)].fill(val)
else: else:
b[array_name].fill(val) b[array_name].fill(val)
self.to_gpu(array_name)
def min(self, array_name, slice_obj=None, ghost_layers=False, inner_ghost_layers=False, reduce=True): def min(self, array_name, slice_obj=None, ghost_layers=False, inner_ghost_layers=False, reduce=True):
"""Returns the minimum value inside the domain or slice of the domain. """Returns the minimum value inside the domain or slice of the domain.
......
...@@ -7,16 +7,18 @@ import waLBerla as wlb ...@@ -7,16 +7,18 @@ import waLBerla as wlb
from pystencils.datahandling.blockiteration import block_iteration, sliced_block_iteration from pystencils.datahandling.blockiteration import block_iteration, sliced_block_iteration
from pystencils.datahandling.datahandling_interface import DataHandling from pystencils.datahandling.datahandling_interface import DataHandling
from pystencils.field import Field from pystencils.enums import Backend
from pystencils.kernelparameters import FieldPointerSymbol from pystencils.field import Field, FieldType
from pystencils.typing.typed_sympy import FieldPointerSymbol
from pystencils.utils import DotDict from pystencils.utils import DotDict
from pystencils import Target
class ParallelDataHandling(DataHandling): class ParallelDataHandling(DataHandling):
GPU_DATA_PREFIX = "gpu_" GPU_DATA_PREFIX = "gpu_"
VTK_COUNTER = 0 VTK_COUNTER = 0
def __init__(self, blocks, default_ghost_layers=1, default_layout='SoA', dim=3, default_target='cpu'): def __init__(self, blocks, default_ghost_layers=1, default_layout='SoA', dim=3, default_target=Target.CPU):
""" """
Creates data handling based on walberla block storage Creates data handling based on walberla block storage
...@@ -27,18 +29,19 @@ class ParallelDataHandling(DataHandling): ...@@ -27,18 +29,19 @@ class ParallelDataHandling(DataHandling):
dim: dimension of scenario, dim: dimension of scenario,
walberla always uses three dimensions, so if dim=2 the extend of the walberla always uses three dimensions, so if dim=2 the extend of the
z coordinate of blocks has to be 1 z coordinate of blocks has to be 1
default_target: either 'cpu' or 'gpu' . If set to 'gpu' for each array also a GPU version is allocated default_target: `Target`, either 'CPU' or 'GPU' . If set to 'GPU' for each array also a GPU version is
if not overwritten in add_array, and synchronization functions are for the GPU by default allocated if not overwritten in add_array, and synchronization functions are for the GPU by
default
""" """
super(ParallelDataHandling, self).__init__() super(ParallelDataHandling, self).__init__()
assert dim in (2, 3) assert dim in (2, 3)
self.blocks = blocks self._blocks = blocks
self.default_ghost_layers = default_ghost_layers self._default_ghost_layers = default_ghost_layers
self.default_layout = default_layout self._default_layout = default_layout
self._fields = DotDict() # maps name to symbolic pystencils field self._fields = DotDict() # maps name to symbolic pystencils field
self._field_name_to_cpu_data_name = {} self._field_name_to_cpu_data_name = {}
self._field_name_to_gpu_data_name = {} self._field_name_to_gpu_data_name = {}
self.data_names = set() self._data_names = set()
self._dim = dim self._dim = dim
self._fieldInformation = {} self._fieldInformation = {}
self._cpu_gpu_pairs = [] self._cpu_gpu_pairs = []
...@@ -52,7 +55,11 @@ class ParallelDataHandling(DataHandling): ...@@ -52,7 +55,11 @@ class ParallelDataHandling(DataHandling):
if self._dim == 2: if self._dim == 2:
assert self.blocks.getDomainCellBB().size[2] == 1 assert self.blocks.getDomainCellBB().size[2] == 1
self.default_target = default_target self._default_target = default_target
@property
def default_target(self):
return self._default_target
@property @property
def dim(self): def dim(self):
...@@ -70,6 +77,22 @@ class ParallelDataHandling(DataHandling): ...@@ -70,6 +77,22 @@ class ParallelDataHandling(DataHandling):
def fields(self): def fields(self):
return self._fields return self._fields
@property
def blocks(self):
return self._blocks
@property
def default_ghost_layers(self):
return self._default_ghost_layers
@property
def default_layout(self):
return self._default_layout
@property
def data_names(self):
return self.data_names
def ghost_layers_of_field(self, name): def ghost_layers_of_field(self, name):
return self._fieldInformation[name]['ghost_layers'] return self._fieldInformation[name]['ghost_layers']
...@@ -90,18 +113,18 @@ class ParallelDataHandling(DataHandling): ...@@ -90,18 +113,18 @@ class ParallelDataHandling(DataHandling):
self._custom_data_names.append(name) self._custom_data_names.append(name)
def add_array(self, name, values_per_cell=1, dtype=np.float64, latex_name=None, ghost_layers=None, def add_array(self, name, values_per_cell=1, dtype=np.float64, latex_name=None, ghost_layers=None,
layout=None, cpu=True, gpu=None, alignment=False): layout=None, cpu=True, gpu=None, alignment=False, field_type=FieldType.GENERIC):
if ghost_layers is None: if ghost_layers is None:
ghost_layers = self.default_ghost_layers ghost_layers = self.default_ghost_layers
if gpu is None: if gpu is None:
gpu = self.default_target == 'gpu' gpu = self.default_target == Target.GPU
if layout is None: if layout is None:
layout = self.default_layout layout = self.default_layout
if len(self.blocks) == 0: if len(self.blocks) == 0:
raise ValueError("Data handling expects that each process has at least one block") raise ValueError("Data handling expects that each process has at least one block")
if hasattr(dtype, 'type'): if hasattr(dtype, 'type'):
dtype = dtype.type dtype = dtype.type
if name in self.blocks[0] or self.GPU_DATA_PREFIX + name in self.blocks[0]: if name in self.blocks[0].fieldNames or self.GPU_DATA_PREFIX + name in self.blocks[0].fieldNames:
raise ValueError("Data with this name has already been added") raise ValueError("Data with this name has already been added")
if alignment is False or alignment is None: if alignment is False or alignment is None:
...@@ -109,11 +132,14 @@ class ParallelDataHandling(DataHandling): ...@@ -109,11 +132,14 @@ class ParallelDataHandling(DataHandling):
if hasattr(values_per_cell, '__len__'): if hasattr(values_per_cell, '__len__'):
raise NotImplementedError("Parallel data handling does not support multiple index dimensions") raise NotImplementedError("Parallel data handling does not support multiple index dimensions")
self._fieldInformation[name] = {'ghost_layers': ghost_layers, self._fieldInformation[name] = {
'values_per_cell': values_per_cell, 'ghost_layers': ghost_layers,
'layout': layout, 'values_per_cell': values_per_cell,
'dtype': dtype, 'layout': layout,
'alignment': alignment} 'dtype': dtype,
'alignment': alignment,
'field_type': field_type,
}
layout_map = {'fzyx': wlb.field.Layout.fzyx, 'zyxf': wlb.field.Layout.zyxf, layout_map = {'fzyx': wlb.field.Layout.fzyx, 'zyxf': wlb.field.Layout.zyxf,
'f': wlb.field.Layout.fzyx, 'f': wlb.field.Layout.fzyx,
...@@ -125,8 +151,8 @@ class ParallelDataHandling(DataHandling): ...@@ -125,8 +151,8 @@ class ParallelDataHandling(DataHandling):
if gpu: if gpu:
if alignment != 0: if alignment != 0:
raise ValueError("Alignment for walberla GPU fields not yet supported") raise ValueError("Alignment for walberla GPU fields not yet supported")
wlb.cuda.addGpuFieldToStorage(self.blocks, self.GPU_DATA_PREFIX + name, dtype, fSize=values_per_cell, wlb.gpu.addGpuFieldToStorage(self.blocks, self.GPU_DATA_PREFIX + name, dtype, fSize=values_per_cell,
usePitchedMem=False, ghostLayers=ghost_layers, layout=layout_map[layout]) usePitchedMem=False, ghostLayers=ghost_layers, layout=layout_map[layout])
if cpu and gpu: if cpu and gpu:
self._cpu_gpu_pairs.append((name, self.GPU_DATA_PREFIX + name)) self._cpu_gpu_pairs.append((name, self.GPU_DATA_PREFIX + name))
...@@ -140,7 +166,8 @@ class ParallelDataHandling(DataHandling): ...@@ -140,7 +166,8 @@ class ParallelDataHandling(DataHandling):
assert all(f.name != name for f in self.fields.values()), "Symbolic field with this name already exists" assert all(f.name != name for f in self.fields.values()), "Symbolic field with this name already exists"
self.fields[name] = Field.create_generic(name, self.dim, dtype, index_dimensions, layout, self.fields[name] = Field.create_generic(name, self.dim, dtype, index_dimensions, layout,
index_shape=(values_per_cell,) if index_dimensions > 0 else None) index_shape=(values_per_cell,) if index_dimensions > 0 else None,
field_type=field_type)
self.fields[name].latex_name = latex_name self.fields[name].latex_name = latex_name
self._field_name_to_cpu_data_name[name] = name self._field_name_to_cpu_data_name[name] = name
if gpu: if gpu:
...@@ -211,15 +238,13 @@ class ParallelDataHandling(DataHandling): ...@@ -211,15 +238,13 @@ class ParallelDataHandling(DataHandling):
array = array[:, :, 0] array = array[:, :, 0]
if last_element and self.fields[name].index_dimensions > 0: if last_element and self.fields[name].index_dimensions > 0:
array = array[..., last_element[0]] array = array[..., last_element[0]]
if self.fields[name].index_dimensions == 0:
array = array[..., 0]
return array return array
def _normalize_arr_shape(self, arr, index_dimensions): def _normalize_arr_shape(self, arr, index_dimensions):
if index_dimensions == 0: if index_dimensions == 0 and len(arr.shape) > 3:
arr = arr[..., 0] arr = arr[..., 0]
if self.dim == 2: if self.dim == 2 and len(arr.shape) > 2:
arr = arr[:, :, 0] arr = arr[:, :, 0]
return arr return arr
...@@ -228,9 +253,9 @@ class ParallelDataHandling(DataHandling): ...@@ -228,9 +253,9 @@ class ParallelDataHandling(DataHandling):
kernel_function(**arg_dict) kernel_function(**arg_dict)
def get_kernel_kwargs(self, kernel_function, **kwargs): def get_kernel_kwargs(self, kernel_function, **kwargs):
if kernel_function.ast.backend == 'gpucuda': if kernel_function.ast.backend == Backend.CUDA:
name_map = self._field_name_to_gpu_data_name name_map = self._field_name_to_gpu_data_name
to_array = wlb.cuda.toGpuArray to_array = wlb.gpu.toGpuArray
else: else:
name_map = self._field_name_to_cpu_data_name name_map = self._field_name_to_cpu_data_name
to_array = wlb.field.toArray to_array = wlb.field.toArray
...@@ -242,7 +267,7 @@ class ParallelDataHandling(DataHandling): ...@@ -242,7 +267,7 @@ class ParallelDataHandling(DataHandling):
for block in self.blocks: for block in self.blocks:
field_args = {} field_args = {}
for data_name, f in data_used_in_kernel: for data_name, f in data_used_in_kernel:
arr = to_array(block[data_name], withGhostLayers=[True, True, self.dim == 3]) arr = to_array(block[data_name], with_ghost_layers=[True, True, self.dim == 3])
arr = self._normalize_arr_shape(arr, f.index_dimensions) arr = self._normalize_arr_shape(arr, f.index_dimensions)
field_args[f.name] = arr field_args[f.name] = arr
field_args.update(kwargs) field_args.update(kwargs)
...@@ -255,7 +280,8 @@ class ParallelDataHandling(DataHandling): ...@@ -255,7 +280,8 @@ class ParallelDataHandling(DataHandling):
for block in self.blocks: for block in self.blocks:
transfer_func(block[self.GPU_DATA_PREFIX + name], block[name]) transfer_func(block[self.GPU_DATA_PREFIX + name], block[name])
else: else:
wlb.cuda.copyFieldToCpu(self.blocks, self.GPU_DATA_PREFIX + name, name) if self.is_on_gpu(name):
wlb.gpu.copyFieldToCpu(self.blocks, self.GPU_DATA_PREFIX + name, name)
def to_gpu(self, name): def to_gpu(self, name):
if name in self._custom_data_transfer_functions: if name in self._custom_data_transfer_functions:
...@@ -263,28 +289,29 @@ class ParallelDataHandling(DataHandling): ...@@ -263,28 +289,29 @@ class ParallelDataHandling(DataHandling):
for block in self.blocks: for block in self.blocks:
transfer_func(block[self.GPU_DATA_PREFIX + name], block[name]) transfer_func(block[self.GPU_DATA_PREFIX + name], block[name])
else: else:
wlb.cuda.copyFieldToGpu(self.blocks, self.GPU_DATA_PREFIX + name, name) if self.is_on_gpu(name):
wlb.gpu.copyFieldToGpu(self.blocks, self.GPU_DATA_PREFIX + name, name)
def is_on_gpu(self, name): def is_on_gpu(self, name):
return (name, self.GPU_DATA_PREFIX + name) in self._cpu_gpu_pairs return (name, self.GPU_DATA_PREFIX + name) in self._cpu_gpu_pairs
def all_to_cpu(self): def all_to_cpu(self):
for cpu_name, gpu_name in self._cpu_gpu_pairs: for cpu_name, gpu_name in self._cpu_gpu_pairs:
wlb.cuda.copyFieldToCpu(self.blocks, gpu_name, cpu_name) wlb.gpu.copyFieldToCpu(self.blocks, gpu_name, cpu_name)
for name in self._custom_data_transfer_functions.keys(): for name in self._custom_data_transfer_functions.keys():
self.to_cpu(name) self.to_cpu(name)
def all_to_gpu(self): def all_to_gpu(self):
for cpu_name, gpu_name in self._cpu_gpu_pairs: for cpu_name, gpu_name in self._cpu_gpu_pairs:
wlb.cuda.copyFieldToGpu(self.blocks, gpu_name, cpu_name) wlb.gpu.copyFieldToGpu(self.blocks, gpu_name, cpu_name)
for name in self._custom_data_transfer_functions.keys(): for name in self._custom_data_transfer_functions.keys():
self.to_gpu(name) self.to_gpu(name)
def synchronization_function_cpu(self, names, stencil=None, buffered=True, stencil_restricted=False, **_): def synchronization_function_cpu(self, names, stencil=None, buffered=True, stencil_restricted=False, **_):
return self.synchronization_function(names, stencil, 'cpu', buffered, stencil_restricted) return self.synchronization_function(names, stencil, Target.CPU, buffered, stencil_restricted)
def synchronization_function_gpu(self, names, stencil=None, buffered=True, stencil_restricted=False, **_): def synchronization_function_gpu(self, names, stencil=None, buffered=True, stencil_restricted=False, **_):
return self.synchronization_function(names, stencil, 'gpu', buffered, stencil_restricted) return self.synchronization_function(names, stencil, Target.GPU, buffered, stencil_restricted)
def synchronization_function(self, names, stencil=None, target=None, buffered=True, stencil_restricted=False): def synchronization_function(self, names, stencil=None, target=None, buffered=True, stencil_restricted=False):
if target is None: if target is None:
...@@ -297,13 +324,13 @@ class ParallelDataHandling(DataHandling): ...@@ -297,13 +324,13 @@ class ParallelDataHandling(DataHandling):
names = [names] names = [names]
create_scheme = wlb.createUniformBufferedScheme if buffered else wlb.createUniformDirectScheme create_scheme = wlb.createUniformBufferedScheme if buffered else wlb.createUniformDirectScheme
if target == 'cpu': if target == Target.CPU:
create_packing = wlb.field.createPackInfo if buffered else wlb.field.createMPIDatatypeInfo create_packing = wlb.field.createPackInfo if buffered else wlb.field.createMPIDatatypeInfo
if not buffered and stencil_restricted: if buffered and stencil_restricted:
create_packing = wlb.field.createStencilRestrictedPackInfo create_packing = wlb.field.createStencilRestrictedPackInfo
else: else:
assert target == 'gpu' assert target == Target.GPU
create_packing = wlb.cuda.createPackInfo if buffered else wlb.cuda.createMPIDatatypeInfo create_packing = wlb.gpu.createPackInfo if buffered else wlb.gpu.createMPIDatatypeInfo
names = [self.GPU_DATA_PREFIX + name for name in names] names = [self.GPU_DATA_PREFIX + name for name in names]
sync_function = create_scheme(self.blocks, stencil) sync_function = create_scheme(self.blocks, stencil)
...@@ -379,7 +406,7 @@ class ParallelDataHandling(DataHandling): ...@@ -379,7 +406,7 @@ class ParallelDataHandling(DataHandling):
if not os.path.exists(directory): if not os.path.exists(directory):
os.mkdir(directory) os.mkdir(directory)
if os.path.isfile(directory): if os.path.isfile(directory):
raise RuntimeError("Trying to save to {}, but file exists already".format(directory)) raise RuntimeError(f"Trying to save to {directory}, but file exists already")
for field_name, data_name in self._field_name_to_cpu_data_name.items(): for field_name, data_name in self._field_name_to_cpu_data_name.items():
self.blocks.writeBlockData(data_name, os.path.join(directory, field_name + ".dat")) self.blocks.writeBlockData(data_name, os.path.join(directory, field_name + ".dat"))
......
...@@ -6,22 +6,24 @@ import numpy as np ...@@ -6,22 +6,24 @@ import numpy as np
from pystencils.datahandling.blockiteration import SerialBlock from pystencils.datahandling.blockiteration import SerialBlock
from pystencils.datahandling.datahandling_interface import DataHandling from pystencils.datahandling.datahandling_interface import DataHandling
from pystencils.field import ( from pystencils.enums import Target
Field, create_numpy_array_with_layout, layout_string_to_tuple, spatial_layout_string_to_tuple) from pystencils.field import (Field, FieldType, create_numpy_array_with_layout,
layout_string_to_tuple, spatial_layout_string_to_tuple)
from pystencils.gpu.gpu_array_handler import GPUArrayHandler, GPUNotAvailableHandler
from pystencils.slicing import normalize_slice, remove_ghost_layers from pystencils.slicing import normalize_slice, remove_ghost_layers
from pystencils.utils import DotDict from pystencils.utils import DotDict
try:
import pycuda.gpuarray as gpuarray
import pycuda.autoinit # NOQA
except ImportError:
gpuarray = None
class SerialDataHandling(DataHandling): class SerialDataHandling(DataHandling):
def __init__(self, domain_size: Sequence[int], default_ghost_layers: int = 1, default_layout: str = 'SoA', def __init__(self,
periodicity: Union[bool, Sequence[bool]] = False, default_target: str = 'cpu') -> None: domain_size: Sequence[int],
default_ghost_layers: int = 1,
default_layout: str = 'SoA',
periodicity: Union[bool, Sequence[bool]] = False,
default_target: Target = Target.CPU,
array_handler=None,
device_number=None) -> None:
""" """
Creates a data handling for single node simulations. Creates a data handling for single node simulations.
...@@ -29,8 +31,17 @@ class SerialDataHandling(DataHandling): ...@@ -29,8 +31,17 @@ class SerialDataHandling(DataHandling):
domain_size: size of the spatial domain as tuple domain_size: size of the spatial domain as tuple
default_ghost_layers: default number of ghost layers used, if not overridden in add_array() method default_ghost_layers: default number of ghost layers used, if not overridden in add_array() method
default_layout: default layout used, if not overridden in add_array() method default_layout: default layout used, if not overridden in add_array() method
default_target: either 'cpu' or 'gpu' . If set to 'gpu' for each array also a GPU version is allocated periodicity: List of booleans that indicate which dimensions have periodic boundary conditions.
if not overwritten in add_array, and synchronization functions are for the GPU by default Alternatively, a single boolean can be given, which is used for all dimensions. Defaults to
False (non-periodic)
default_target: `Target` either 'CPU' or 'GPU'. If set to 'GPU' for each array also a GPU version is
allocated if not overwritten in add_array, and synchronization functions are for the GPU by
default
array_handler: An object that provides the same interface as `GPUArrayHandler`, which is used for creation
and transferring of GPU arrays. Default is to construct a fresh `GPUArrayHandler`
device_number: If `default_target` is set to 'GPU', a device number should be specified. If none is given,
the device with the largest amount of memory is used. If multiple devices have the same
amount of memory, the one with the lower number is used
""" """
super(SerialDataHandling, self).__init__() super(SerialDataHandling, self).__init__()
self._domainSize = tuple(domain_size) self._domainSize = tuple(domain_size)
...@@ -43,6 +54,19 @@ class SerialDataHandling(DataHandling): ...@@ -43,6 +54,19 @@ class SerialDataHandling(DataHandling):
self.custom_data_gpu = DotDict() self.custom_data_gpu = DotDict()
self._custom_data_transfer_functions = {} self._custom_data_transfer_functions = {}
if not array_handler:
try:
if device_number is None:
import cupy.cuda.runtime
if cupy.cuda.runtime.getDeviceCount() > 0:
device_number = sorted(range(cupy.cuda.runtime.getDeviceCount()),
key=lambda i: cupy.cuda.Device(i).mem_info[1], reverse=True)[0]
self.array_handler = GPUArrayHandler(device_number)
except ImportError:
self.array_handler = GPUNotAvailableHandler()
else:
self.array_handler = array_handler
if periodicity is None or periodicity is False: if periodicity is None or periodicity is False:
periodicity = [False] * self.dim periodicity = [False] * self.dim
if periodicity is True: if periodicity is True:
...@@ -50,9 +74,13 @@ class SerialDataHandling(DataHandling): ...@@ -50,9 +74,13 @@ class SerialDataHandling(DataHandling):
self._periodicity = periodicity self._periodicity = periodicity
self._field_information = {} self._field_information = {}
self.default_target = default_target self._default_target = default_target
self._start_time = time.perf_counter() self._start_time = time.perf_counter()
@property
def default_target(self):
return self._default_target
@property @property
def dim(self): def dim(self):
return len(self._domainSize) return len(self._domainSize)
...@@ -76,13 +104,13 @@ class SerialDataHandling(DataHandling): ...@@ -76,13 +104,13 @@ class SerialDataHandling(DataHandling):
return self._field_information[name]['values_per_cell'] return self._field_information[name]['values_per_cell']
def add_array(self, name, values_per_cell=1, dtype=np.float64, latex_name=None, ghost_layers=None, layout=None, def add_array(self, name, values_per_cell=1, dtype=np.float64, latex_name=None, ghost_layers=None, layout=None,
cpu=True, gpu=None, alignment=False): cpu=True, gpu=None, alignment=False, field_type=FieldType.GENERIC):
if ghost_layers is None: if ghost_layers is None:
ghost_layers = self.default_ghost_layers ghost_layers = self.default_ghost_layers
if layout is None: if layout is None:
layout = self.default_layout layout = self.default_layout
if gpu is None: if gpu is None:
gpu = self.default_target == 'gpu' gpu = self.default_target in self._GPU_LIKE_TARGETS
kwargs = { kwargs = {
'shape': tuple(s + 2 * ghost_layers for s in self._domainSize), 'shape': tuple(s + 2 * ghost_layers for s in self._domainSize),
...@@ -90,7 +118,7 @@ class SerialDataHandling(DataHandling): ...@@ -90,7 +118,7 @@ class SerialDataHandling(DataHandling):
} }
if not hasattr(values_per_cell, '__len__'): if not hasattr(values_per_cell, '__len__'):
values_per_cell = (values_per_cell, ) values_per_cell = (values_per_cell,)
if len(values_per_cell) == 1 and values_per_cell[0] == 1: if len(values_per_cell) == 1 and values_per_cell[0] == 1:
values_per_cell = () values_per_cell = ()
...@@ -100,6 +128,7 @@ class SerialDataHandling(DataHandling): ...@@ -100,6 +128,7 @@ class SerialDataHandling(DataHandling):
'layout': layout, 'layout': layout,
'dtype': dtype, 'dtype': dtype,
'alignment': alignment, 'alignment': alignment,
'field_type': field_type,
} }
index_dimensions = len(values_per_cell) index_dimensions = len(values_per_cell)
...@@ -110,10 +139,14 @@ class SerialDataHandling(DataHandling): ...@@ -110,10 +139,14 @@ class SerialDataHandling(DataHandling):
else: else:
layout_tuple = spatial_layout_string_to_tuple(layout, self.dim) layout_tuple = spatial_layout_string_to_tuple(layout, self.dim)
# cpu_arr is always created - since there is no create_pycuda_array_with_layout() # cpu_arr is always created - since there is no create_gpu_array_with_layout()
byte_offset = ghost_layers * np.dtype(dtype).itemsize byte_offset = ghost_layers * np.dtype(dtype).itemsize
cpu_arr = create_numpy_array_with_layout(layout=layout_tuple, alignment=alignment,
byte_offset=byte_offset, **kwargs) if gpu:
cpu_arr = self.array_handler.pinned_numpy_array(shape=kwargs['shape'], layout=layout_tuple, dtype=dtype)
else:
cpu_arr = create_numpy_array_with_layout(layout=layout_tuple, alignment=alignment,
byte_offset=byte_offset, **kwargs)
if alignment and gpu: if alignment and gpu:
raise NotImplementedError("Alignment for GPU fields not supported") raise NotImplementedError("Alignment for GPU fields not supported")
...@@ -125,10 +158,11 @@ class SerialDataHandling(DataHandling): ...@@ -125,10 +158,11 @@ class SerialDataHandling(DataHandling):
if gpu: if gpu:
if name in self.gpu_arrays: if name in self.gpu_arrays:
raise ValueError("GPU Field with this name already exists") raise ValueError("GPU Field with this name already exists")
self.gpu_arrays[name] = gpuarray.to_gpu(cpu_arr) self.gpu_arrays[name] = self.array_handler.to_gpu(cpu_arr)
assert all(f.name != name for f in self.fields.values()), "Symbolic field with this name already exists" assert all(f.name != name for f in self.fields.values()), "Symbolic field with this name already exists"
self.fields[name] = Field.create_from_numpy_array(name, cpu_arr, index_dimensions=index_dimensions) self.fields[name] = Field.create_from_numpy_array(name, cpu_arr, index_dimensions=index_dimensions,
field_type=field_type)
self.fields[name].latex_name = latex_name self.fields[name].latex_name = latex_name
return self.fields[name] return self.fields[name]
...@@ -207,7 +241,7 @@ class SerialDataHandling(DataHandling): ...@@ -207,7 +241,7 @@ class SerialDataHandling(DataHandling):
def swap(self, name1, name2, gpu=None): def swap(self, name1, name2, gpu=None):
if gpu is None: if gpu is None:
gpu = self.default_target == "gpu" gpu = self.default_target in self._GPU_LIKE_TARGETS
arr = self.gpu_arrays if gpu else self.cpu_arrays arr = self.gpu_arrays if gpu else self.cpu_arrays
arr[name1], arr[name2] = arr[name2], arr[name1] arr[name1], arr[name2] = arr[name2], arr[name1]
...@@ -220,12 +254,12 @@ class SerialDataHandling(DataHandling): ...@@ -220,12 +254,12 @@ class SerialDataHandling(DataHandling):
self.to_gpu(name) self.to_gpu(name)
def run_kernel(self, kernel_function, **kwargs): def run_kernel(self, kernel_function, **kwargs):
arrays = self.gpu_arrays if kernel_function.ast.backend == 'gpucuda' else self.cpu_arrays arrays = self.gpu_arrays if kernel_function.ast.backend in self._GPU_LIKE_BACKENDS else self.cpu_arrays
kernel_function(**arrays, **kwargs) kernel_function(**{**arrays, **kwargs})
def get_kernel_kwargs(self, kernel_function, **kwargs): def get_kernel_kwargs(self, kernel_function, **kwargs):
result = {} result = {}
result.update(self.gpu_arrays if kernel_function.ast.backend == 'gpucuda' else self.cpu_arrays) result.update(self.gpu_arrays if kernel_function.ast.backend in self._GPU_LIKE_BACKENDS else self.cpu_arrays)
result.update(kwargs) result.update(kwargs)
return [result] return [result]
...@@ -234,28 +268,30 @@ class SerialDataHandling(DataHandling): ...@@ -234,28 +268,30 @@ class SerialDataHandling(DataHandling):
transfer_func = self._custom_data_transfer_functions[name][1] transfer_func = self._custom_data_transfer_functions[name][1]
transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name]) transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name])
else: else:
self.gpu_arrays[name].get(self.cpu_arrays[name]) if name in self.cpu_arrays.keys() & self.gpu_arrays.keys():
self.array_handler.download(self.gpu_arrays[name], self.cpu_arrays[name])
def to_gpu(self, name): def to_gpu(self, name):
if name in self._custom_data_transfer_functions: if name in self._custom_data_transfer_functions:
transfer_func = self._custom_data_transfer_functions[name][0] transfer_func = self._custom_data_transfer_functions[name][0]
transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name]) transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name])
else: else:
self.gpu_arrays[name].set(self.cpu_arrays[name]) if name in self.cpu_arrays.keys() & self.gpu_arrays.keys():
self.array_handler.upload(self.gpu_arrays[name], self.cpu_arrays[name])
def is_on_gpu(self, name): def is_on_gpu(self, name):
return name in self.gpu_arrays return name in self.gpu_arrays
def synchronization_function_cpu(self, names, stencil_name=None, **_): def synchronization_function_cpu(self, names, stencil_name=None, **_):
return self.synchronization_function(names, stencil_name, 'cpu') return self.synchronization_function(names, stencil_name, target=Target.CPU)
def synchronization_function_gpu(self, names, stencil_name=None, **_): def synchronization_function_gpu(self, names, stencil_name=None, **_):
return self.synchronization_function(names, stencil_name, 'gpu') return self.synchronization_function(names, stencil_name, target=Target.GPU)
def synchronization_function(self, names, stencil=None, target=None, **_): def synchronization_function(self, names, stencil=None, target=None, functor=None, **_):
if target is None: if target is None:
target = self.default_target target = self.default_target
assert target in ('cpu', 'gpu') assert target in (Target.CPU, Target.GPU)
if not hasattr(names, '__len__') or type(names) is str: if not hasattr(names, '__len__') or type(names) is str:
names = [names] names = [names]
...@@ -284,25 +320,28 @@ class SerialDataHandling(DataHandling): ...@@ -284,25 +320,28 @@ class SerialDataHandling(DataHandling):
gls = self._field_information[name]['ghost_layers'] gls = self._field_information[name]['ghost_layers']
values_per_cell = self._field_information[name]['values_per_cell'] values_per_cell = self._field_information[name]['values_per_cell']
if values_per_cell == (): if values_per_cell == ():
values_per_cell = (1, ) values_per_cell = (1,)
if len(values_per_cell) == 1: if len(values_per_cell) == 1:
values_per_cell = values_per_cell[0] values_per_cell = values_per_cell[0]
else:
raise NotImplementedError("Synchronization of this field is not supported: " + name)
if len(filtered_stencil) > 0: if len(filtered_stencil) > 0:
if target == 'cpu': if target == Target.CPU:
from pystencils.slicing import get_periodic_boundary_functor if functor is None:
result.append(get_periodic_boundary_functor(filtered_stencil, ghost_layers=gls)) from pystencils.slicing import get_periodic_boundary_functor
functor = get_periodic_boundary_functor
result.append(functor(filtered_stencil, ghost_layers=gls))
else: else:
from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as boundary_func if functor is None:
result.append(boundary_func(filtered_stencil, self._domainSize, from pystencils.gpu.periodicity import get_periodic_boundary_functor as functor
index_dimensions=self.fields[name].index_dimensions, target = Target.GPU
index_dim_shape=values_per_cell, result.append(functor(filtered_stencil, self._domainSize,
dtype=self.fields[name].dtype.numpy_dtype, index_dimensions=self.fields[name].index_dimensions,
ghost_layers=gls)) index_dim_shape=values_per_cell,
dtype=self.fields[name].dtype.numpy_dtype,
if target == 'cpu': ghost_layers=gls,
target=target))
if target == Target.CPU:
def result_functor(): def result_functor():
for arr_name, func in zip(names, result): for arr_name, func in zip(names, result):
func(pdfs=self.cpu_arrays[arr_name]) func(pdfs=self.cpu_arrays[arr_name])
...@@ -353,6 +392,7 @@ class SerialDataHandling(DataHandling): ...@@ -353,6 +392,7 @@ class SerialDataHandling(DataHandling):
raise NotImplementedError("VTK export for fields with more than one index " raise NotImplementedError("VTK export for fields with more than one index "
"coordinate not implemented") "coordinate not implemented")
image_to_vtk(full_file_name, cell_data=cell_data) image_to_vtk(full_file_name, cell_data=cell_data)
return writer return writer
def create_vtk_writer_for_flag_array(self, file_name, data_name, masks_to_name, ghost_layers=False): def create_vtk_writer_for_flag_array(self, file_name, data_name, masks_to_name, ghost_layers=False):
...@@ -384,7 +424,7 @@ class SerialDataHandling(DataHandling): ...@@ -384,7 +424,7 @@ class SerialDataHandling(DataHandling):
time_running = time.perf_counter() - self._start_time time_running = time.perf_counter() - self._start_time
spacing = 7 - len(str(int(time_running))) spacing = 7 - len(str(int(time_running)))
message = "[{: <8}]{}({:.3f} sec) {} ".format(level, spacing * '-', time_running, message) message = f"[{level: <8}]{spacing * '-'}({time_running:.3f} sec) {message} "
print(message, flush=True) print(message, flush=True)
def log_on_root(self, *args, level='INFO'): def log_on_root(self, *args, level='INFO'):
...@@ -398,18 +438,28 @@ class SerialDataHandling(DataHandling): ...@@ -398,18 +438,28 @@ class SerialDataHandling(DataHandling):
def world_rank(self): def world_rank(self):
return 0 return 0
def save_all(self, file): def save_all(self, filename, compressed=True, synchronise_data=True):
np.savez_compressed(file, **self.cpu_arrays) if synchronise_data:
for name in (self.cpu_arrays.keys() & self.gpu_arrays.keys()):
self.to_cpu(name)
if compressed:
np.savez_compressed(filename, **self.cpu_arrays)
else:
np.savez(filename, **self.cpu_arrays)
def load_all(self, file): def load_all(self, filename, synchronise_data=True):
file_contents = np.load(file) if '.npz' not in filename:
filename += '.npz'
file_contents = np.load(filename)
for arr_name, arr_contents in self.cpu_arrays.items(): for arr_name, arr_contents in self.cpu_arrays.items():
if arr_name not in file_contents: if arr_name not in file_contents:
print("Skipping read data {} because there is no data with this name in data handling".format(arr_name)) print(f"Skipping read data {arr_name} because there is no data with this name in data handling")
continue continue
if file_contents[arr_name].shape != arr_contents.shape: if file_contents[arr_name].shape != arr_contents.shape:
print("Skipping read data {} because shapes don't match. " print(f"Skipping read data {arr_name} because shapes don't match. "
"Read array shape {}, existing array shape {}".format(arr_name, file_contents[arr_name].shape, f"Read array shape {file_contents[arr_name].shape}, existing array shape {arr_contents.shape}")
arr_contents.shape))
continue continue
np.copyto(arr_contents, file_contents[arr_name]) np.copyto(arr_contents, file_contents[arr_name])
if synchronise_data:
if arr_name in self.gpu_arrays.keys():
self.to_gpu(arr_name)
from typing import Any, Dict, Optional from typing import Any, Dict, Optional, Union
import sympy as sp import sympy as sp
from pystencils.astnodes import KernelFunction from pystencils.astnodes import KernelFunction
from pystencils.enums import Backend
from pystencils.kernel_wrapper import KernelWrapper
def to_dot(expr: sp.Expr, graph_style: Optional[Dict[str, Any]] = None, short=True): def to_dot(expr: sp.Expr, graph_style: Optional[Dict[str, Any]] = None, short=True):
"""Show a sympy or pystencils AST as dot graph""" """Show a sympy or pystencils AST as dot graph"""
from pystencils.astnodes import Node from pystencils.astnodes import Node
import graphviz try:
import graphviz
except ImportError:
print("graphviz is not installed. Visualizing the AST is not available")
return
graph_style = {} if graph_style is None else graph_style graph_style = {} if graph_style is None else graph_style
if isinstance(expr, Node): if isinstance(expr, Node):
...@@ -29,18 +36,24 @@ def highlight_cpp(code: str): ...@@ -29,18 +36,24 @@ def highlight_cpp(code: str):
from pygments.lexers import CppLexer from pygments.lexers import CppLexer
css = HtmlFormatter().get_style_defs('.highlight') css = HtmlFormatter().get_style_defs('.highlight')
css_tag = "<style>{css}</style>".format(css=css) css_tag = f"<style>{css}</style>"
display(HTML(css_tag)) display(HTML(css_tag))
return HTML(highlight(code, CppLexer(), HtmlFormatter())) return HTML(highlight(code, CppLexer(), HtmlFormatter()))
def show_code(ast: KernelFunction, custom_backend=None): def get_code_obj(ast: Union[KernelFunction, KernelWrapper], custom_backend=None):
"""Returns an object to display generated code (C/C++ or CUDA) """Returns an object to display generated code (C/C++ or CUDA)
Can either be displayed as HTML in Jupyter notebooks or printed as normal string. Can either be displayed as HTML in Jupyter notebooks or printed as normal string.
""" """
from pystencils.backends.cbackend import generate_c from pystencils.backends.cbackend import generate_c
dialect = 'cuda' if ast.backend == 'gpucuda' else 'c'
if isinstance(ast, KernelWrapper):
ast = ast.ast
if ast.backend not in {Backend.C, Backend.CUDA}:
raise NotImplementedError(f'get_code_obj is not implemented for backend {ast.backend}')
dialect = ast.backend
class CodeDisplay: class CodeDisplay:
def __init__(self, ast_input): def __init__(self, ast_input):
...@@ -55,3 +68,37 @@ def show_code(ast: KernelFunction, custom_backend=None): ...@@ -55,3 +68,37 @@ def show_code(ast: KernelFunction, custom_backend=None):
def __repr__(self): def __repr__(self):
return generate_c(self.ast, dialect=dialect, custom_backend=custom_backend) return generate_c(self.ast, dialect=dialect, custom_backend=custom_backend)
return CodeDisplay(ast) return CodeDisplay(ast)
def get_code_str(ast, custom_backend=None):
return str(get_code_obj(ast, custom_backend))
def _isnotebook():
try:
shell = get_ipython().__class__.__name__
if shell == 'ZMQInteractiveShell':
return True # Jupyter notebook or qtconsole
elif shell == 'TerminalInteractiveShell':
return False # Terminal running IPython
else:
return False # Other type (?)
except NameError:
return False
def show_code(ast: Union[KernelFunction, KernelWrapper], custom_backend=None):
code = get_code_obj(ast, custom_backend)
if _isnotebook():
from IPython.display import display
display(code)
else:
try:
import rich.syntax
import rich.console
syntax = rich.syntax.Syntax(str(code), "c++", theme="monokai", line_numbers=True)
console = rich.console.Console()
console.print(syntax)
except ImportError:
print(code)
from enum import Enum, auto
class Target(Enum):
"""
The Target enumeration represents all possible targets that can be used for the code generation.
"""
CPU = auto()
"""
Target CPU architecture.
"""
GPU = auto()
"""
Target GPU architecture.
"""
class Backend(Enum):
"""
The Backend enumeration represents all possible backends that can be used for the code generation.
Backends and targets must be combined with care. For example CPU as a target and CUDA as a backend makes no sense.
"""
C = auto()
"""
Use the C Backend of pystencils.
"""
CUDA = auto()
"""
Use the CUDA backend to generate code for NVIDIA GPUs.
"""
...@@ -4,20 +4,30 @@ import sympy as sp ...@@ -4,20 +4,30 @@ import sympy as sp
from pystencils.astnodes import Node from pystencils.astnodes import Node
from pystencils.simp import AssignmentCollection from pystencils.simp import AssignmentCollection
from pystencils.assignment import Assignment
# noinspection PyPep8Naming # noinspection PyPep8Naming
class fast_division(sp.Function): class fast_division(sp.Function):
"""
Produces special float instructions for CUDA kernels
"""
nargs = (2,) nargs = (2,)
# noinspection PyPep8Naming # noinspection PyPep8Naming
class fast_sqrt(sp.Function): class fast_sqrt(sp.Function):
"""
Produces special float instructions for CUDA kernels
"""
nargs = (1, ) nargs = (1, )
# noinspection PyPep8Naming # noinspection PyPep8Naming
class fast_inv_sqrt(sp.Function): class fast_inv_sqrt(sp.Function):
"""
Produces special float instructions for CUDA kernels
"""
nargs = (1, ) nargs = (1, )
...@@ -32,7 +42,7 @@ def _run(term, visitor): ...@@ -32,7 +42,7 @@ def _run(term, visitor):
return visitor(term) return visitor(term)
def insert_fast_sqrts(term: Union[sp.Expr, List[sp.Expr], AssignmentCollection]): def insert_fast_sqrts(term: Union[sp.Expr, List[sp.Expr], AssignmentCollection, Assignment]):
def visit(expr): def visit(expr):
if isinstance(expr, Node): if isinstance(expr, Node):
return expr return expr
...@@ -48,7 +58,7 @@ def insert_fast_sqrts(term: Union[sp.Expr, List[sp.Expr], AssignmentCollection]) ...@@ -48,7 +58,7 @@ def insert_fast_sqrts(term: Union[sp.Expr, List[sp.Expr], AssignmentCollection])
return _run(term, visit) return _run(term, visit)
def insert_fast_divisions(term: Union[sp.Expr, List[sp.Expr], AssignmentCollection]): def insert_fast_divisions(term: Union[sp.Expr, List[sp.Expr], AssignmentCollection, Assignment]):
def visit(expr): def visit(expr):
if isinstance(expr, Node): if isinstance(expr, Node):
......
...@@ -3,10 +3,11 @@ from .derivative import ( ...@@ -3,10 +3,11 @@ from .derivative import (
expand_diff_full, expand_diff_linear, expand_diff_products, functional_derivative, expand_diff_full, expand_diff_linear, expand_diff_products, functional_derivative,
normalize_diff_order, zero_diffs) normalize_diff_order, zero_diffs)
from .finitedifferences import Discretization2ndOrder, advection, diffusion, transient from .finitedifferences import Discretization2ndOrder, advection, diffusion, transient
from .finitevolumes import FVM1stOrder, VOF
from .spatial import discretize_spatial, discretize_spatial_staggered from .spatial import discretize_spatial, discretize_spatial_staggered
__all__ = ['Diff', 'diff', 'DiffOperator', 'diff_terms', 'collect_diffs', __all__ = ['Diff', 'diff', 'DiffOperator', 'diff_terms', 'collect_diffs',
'zero_diffs', 'evaluate_diffs', 'normalize_diff_order', 'expand_diff_full', 'expand_diff_linear', 'zero_diffs', 'evaluate_diffs', 'normalize_diff_order', 'expand_diff_full', 'expand_diff_linear',
'expand_diff_products', 'combine_diff_products', 'functional_derivative', 'expand_diff_products', 'combine_diff_products', 'functional_derivative',
'advection', 'diffusion', 'transient', 'Discretization2ndOrder', 'discretize_spatial', 'advection', 'diffusion', 'transient', 'Discretization2ndOrder', 'discretize_spatial',
'discretize_spatial_staggered'] 'discretize_spatial_staggered', 'FVM1stOrder', 'VOF']
import warnings import itertools
from collections import defaultdict from collections import defaultdict
import numpy as np import numpy as np
import sympy as sp import sympy as sp
from pystencils.field import Field from pystencils.field import Field
from pystencils.stencil import direction_string_to_offset
from pystencils.sympyextensions import multidimensional_sum, prod from pystencils.sympyextensions import multidimensional_sum, prod
from pystencils.utils import LinearEquationSystem, fully_contains from pystencils.utils import LinearEquationSystem, fully_contains
...@@ -106,7 +107,7 @@ class FiniteDifferenceStencilDerivation: ...@@ -106,7 +107,7 @@ class FiniteDifferenceStencilDerivation:
@staticmethod @staticmethod
def symbolic_weight(*args): def symbolic_weight(*args):
str_args = [str(e) for e in args] str_args = [str(e) for e in args]
return sp.Symbol("w_({})".format(",".join(str_args))) return sp.Symbol(f"w_({','.join(str_args)})")
def error_term_dict(self, order): def error_term_dict(self, order):
error_terms = defaultdict(lambda: 0) error_terms = defaultdict(lambda: 0)
...@@ -125,7 +126,6 @@ class FiniteDifferenceStencilDerivation: ...@@ -125,7 +126,6 @@ class FiniteDifferenceStencilDerivation:
def isotropy_equations(self, order): def isotropy_equations(self, order):
def cycle_int_sequence(sequence, modulus): def cycle_int_sequence(sequence, modulus):
import numpy as np
result = [] result = []
arr = np.array(sequence, dtype=int) arr = np.array(sequence, dtype=int)
while True: while True:
...@@ -170,17 +170,8 @@ class FiniteDifferenceStencilDerivation: ...@@ -170,17 +170,8 @@ class FiniteDifferenceStencilDerivation:
f = field_access f = field_access
return sum(f.get_shifted(*offset) * weight for offset, weight in zip(self.stencil, self.weights)) return sum(f.get_shifted(*offset) * weight for offset, weight in zip(self.stencil, self.weights))
def as_matrix(self): def __array__(self):
warnings.warn("as_matrix is deprecated and may be removed in the near future." return np.array(self.as_array().tolist())
"Please use as_array instead which will return an MutableDenseNDimArray."
"as_array therefore can also work in 3 dimensions", category=DeprecationWarning)
dim = len(self.stencil[0])
assert dim == 2
max_offset = max(max(abs(e) for e in direction) for direction in self.stencil)
result = sp.Matrix(2 * max_offset + 1, 2 * max_offset + 1, lambda i, j: 0)
for direction, weight in zip(self.stencil, self.weights):
result[max_offset - direction[1], max_offset + direction[0]] = weight
return result
def as_array(self): def as_array(self):
dim = len(self.stencil[0]) dim = len(self.stencil[0])
...@@ -203,12 +194,12 @@ class FiniteDifferenceStencilDerivation: ...@@ -203,12 +194,12 @@ class FiniteDifferenceStencilDerivation:
return result return result
def rotate_weights_and_apply(self, field_access: Field.Access, axis): def rotate_weights_and_apply(self, field_access: Field.Access, axes):
"""derive gradient weights of other direction with already calculated weights of one direction """derive gradient weights of other direction with already calculated weights of one direction
via rotation and apply them to a field.""" via rotation and apply them to a field."""
dim = len(self.stencil[0]) dim = len(self.stencil[0])
assert (dim == 2 or dim == 3), "This function is only for 2D or 3D stencils available" assert (dim == 2 or dim == 3), "This function is only for 2D or 3D stencils available"
rotated_weights = np.rot90(np.array(self.as_array()).reshape(self.as_array().shape), 1, axis) rotated_weights = np.rot90(np.array(self.__array__()), 1, axes)
result = [] result = []
max_offset = max(max(abs(e) for e in direction) for direction in self.stencil) max_offset = max(max(abs(e) for e in direction) for direction in self.stencil)
...@@ -228,3 +219,119 @@ class FiniteDifferenceStencilDerivation: ...@@ -228,3 +219,119 @@ class FiniteDifferenceStencilDerivation:
def __repr__(self): def __repr__(self):
return "Finite difference stencil of accuracy {}, isotropic error: {}".format(self.accuracy, return "Finite difference stencil of accuracy {}, isotropic error: {}".format(self.accuracy,
self.is_isotropic) self.is_isotropic)
class FiniteDifferenceStaggeredStencilDerivation:
"""Derives a finite difference stencil for application at a staggered position
Args:
neighbor: the neighbor direction string or vector at whose staggered position to calculate the derivative
dim: how many dimensions (2 or 3)
derivative: a tuple of directions over which to perform derivatives
free_weights_prefix: a string to prefix to free weight symbols. If None, do not return free weights
"""
def __init__(self, neighbor, dim, derivative=tuple(), free_weights_prefix=None):
if type(neighbor) is str:
neighbor = direction_string_to_offset(neighbor)
if dim == 2:
assert neighbor[dim:] == 0
assert derivative is tuple() or max(derivative) < dim
neighbor = sp.Matrix(neighbor[:dim])
pos = neighbor / 2
def unitvec(i):
"""return the `i`-th unit vector in three dimensions"""
a = np.zeros(dim, dtype=int)
a[i] = 1
return a
def flipped(a, i):
"""return `a` with its `i`-th element's sign flipped"""
a = a.copy()
a[i] *= -1
return a
# determine the points to use, coordinates are relative to position
points = []
if np.linalg.norm(neighbor, 1) == 1:
main_points = [neighbor / 2, neighbor / -2]
elif np.linalg.norm(neighbor, 1) == 2:
nonzero_indices = [i for i, v in enumerate(neighbor) if v != 0 and i < dim]
main_points = [neighbor / 2, neighbor / -2, flipped(neighbor / 2, nonzero_indices[0]),
flipped(neighbor / -2, nonzero_indices[0])]
else:
main_points = [sp.Matrix(np.multiply(neighbor, sp.Matrix(c) / 2))
for c in itertools.product([-1, 1], repeat=3)]
points += main_points
zero_indices = [i for i, v in enumerate(neighbor) if v == 0 and i < dim]
for i in zero_indices:
points += [point + sp.Matrix(unitvec(i)) for point in main_points]
points += [point - sp.Matrix(unitvec(i)) for point in main_points]
points_tuple = tuple([tuple(p) for p in points])
self._stencil = points_tuple
# determine the stencil weights
if len(derivative) == 0:
weights = None
else:
derivation = FiniteDifferenceStencilDerivation(derivative, points_tuple).get_stencil()
if not derivation.accuracy:
raise Exception('the requested derivative cannot be performed with the available neighbors')
weights = derivation.weights
# if the weights are underdefined, we can choose the free symbols to find the sparsest stencil
free_weights = set(itertools.chain(*[w.free_symbols for w in weights]))
if free_weights_prefix is not None:
weights = [w.subs({fw: sp.Symbol(f"{free_weights_prefix}_{i}") for i, fw in enumerate(free_weights)})
for w in weights]
elif len(free_weights) > 0:
zero_counts = defaultdict(list)
for values in itertools.product([-1, -sp.Rational(1, 2), 0, 1, sp.Rational(1, 2)],
repeat=len(free_weights)):
subs = {free_weight: value for free_weight, value in zip(free_weights, values)}
weights = [w.subs(subs) for w in derivation.weights]
if not all(a == 0 for a in weights):
zero_count = sum([1 for w in weights if w == 0])
zero_counts[zero_count].append(weights)
best = zero_counts[max(zero_counts.keys())]
if len(best) > 1: # if there are multiple, pick the one that contains a nonzero center weight
center = [tuple(p + pos) for p in points].index((0, 0, 0)[:dim])
best = [b for b in best if b[center] != 0]
if len(best) > 1: # if there are still multiple, they are equivalent, so we average
weights = [sum([b[i] for b in best]) / len(best) for i in range(len(weights))]
else:
weights = best[0]
assert weights
points_tuple = tuple([tuple(p + pos) for p in points])
self._points = points_tuple
self._weights = weights
@property
def points(self):
"""return the points of the stencil"""
return self._points
@property
def stencil(self):
"""return the points of the stencil relative to the staggered position specified by neighbor"""
return self._stencil
@property
def weights(self):
"""return the weights of the stencil"""
assert self._weights is not None
return self._weights
def visualize(self):
if self._weights is None:
ws = None
else:
ws = np.array([w for w in self.weights if w != 0], dtype=float)
pts = np.array([p for i, p in enumerate(self.points) if self.weights[i] != 0], dtype=int)
from pystencils.stencil import plot
plot(pts, data=ws)
def apply(self, access: Field.Access):
return sum([access.get_shifted(*point) * weight for point, weight in zip(self.points, self.weights)])
...@@ -109,7 +109,17 @@ class Diff(sp.Expr): ...@@ -109,7 +109,17 @@ class Diff(sp.Expr):
return result return result
def __str__(self): def __str__(self):
return "D(%s)" % self.arg return f"D({self.arg})"
def interpolated_access(self, offset, **kwargs):
"""Represents an interpolated access on a spatially differentiated field
Args:
offset (Tuple[sympy.Expr]): Absolute position to determine the value of the spatial derivative
"""
from pystencils.interpolation_astnodes import DiffInterpolatorAccess
assert isinstance(self.arg.field, Field), "Must be field to enable interpolated accesses"
return DiffInterpolatorAccess(self.arg.field.interpolated_access(offset, **kwargs).symbol, self.target, *offset)
class DiffOperator(sp.Expr): class DiffOperator(sp.Expr):
...@@ -218,7 +228,9 @@ def diff_terms(expr): ...@@ -218,7 +228,9 @@ def diff_terms(expr):
Example: Example:
>>> x, y = sp.symbols("x, y") >>> x, y = sp.symbols("x, y")
>>> diff_terms( diff(x, 0, 0) ) >>> diff_terms( diff(x, 0, 0) )
{Diff(Diff(x, 0, -1), 0, -1)}
>>> diff_terms( diff(x, 0, 0) + y )
{Diff(Diff(x, 0, -1), 0, -1)} {Diff(Diff(x, 0, -1), 0, -1)}
""" """
result = set() result = set()
...@@ -306,7 +318,8 @@ def expand_diff_full(expr, functions=None, constants=None): ...@@ -306,7 +318,8 @@ def expand_diff_full(expr, functions=None, constants=None):
functions.difference_update(constants) functions.difference_update(constants)
def visit(e): def visit(e):
e = e.expand() if not isinstance(e, sp.Tuple):
e = e.expand()
if e.func == Diff: if e.func == Diff:
result = 0 result = 0
...@@ -331,6 +344,9 @@ def expand_diff_full(expr, functions=None, constants=None): ...@@ -331,6 +344,9 @@ def expand_diff_full(expr, functions=None, constants=None):
return result return result
elif isinstance(e, sp.Piecewise): elif isinstance(e, sp.Piecewise):
return sp.Piecewise(*((expand_diff_full(a, functions, constants), b) for a, b in e.args)) return sp.Piecewise(*((expand_diff_full(a, functions, constants), b) for a, b in e.args))
elif isinstance(expr, sp.Tuple):
new_args = [visit(arg) for arg in e.args]
return sp.Tuple(*new_args)
else: else:
new_args = [visit(arg) for arg in e.args] new_args = [visit(arg) for arg in e.args]
return e.func(*new_args) if new_args else e return e.func(*new_args) if new_args else e
...@@ -370,6 +386,9 @@ def expand_diff_linear(expr, functions=None, constants=None): ...@@ -370,6 +386,9 @@ def expand_diff_linear(expr, functions=None, constants=None):
return diff.split_linear(functions) return diff.split_linear(functions)
elif isinstance(expr, sp.Piecewise): elif isinstance(expr, sp.Piecewise):
return sp.Piecewise(*((expand_diff_linear(a, functions, constants), b) for a, b in expr.args)) return sp.Piecewise(*((expand_diff_linear(a, functions, constants), b) for a, b in expr.args))
elif isinstance(expr, sp.Tuple):
new_args = [expand_diff_linear(e, functions) for e in expr.args]
return sp.Tuple(*new_args)
else: else:
new_args = [expand_diff_linear(e, functions) for e in expr.args] new_args = [expand_diff_linear(e, functions) for e in expr.args]
result = sp.expand(expr.func(*new_args) if new_args else expr) result = sp.expand(expr.func(*new_args) if new_args else expr)
......
...@@ -21,10 +21,13 @@ def diffusion(scalar, diffusion_coeff, idx=None): ...@@ -21,10 +21,13 @@ def diffusion(scalar, diffusion_coeff, idx=None):
Examples: Examples:
>>> f = Field.create_generic('f', spatial_dimensions=2) >>> f = Field.create_generic('f', spatial_dimensions=2)
>>> diffusion_term = diffusion(scalar=f, diffusion_coeff=sp.Symbol("d")) >>> d = sp.Symbol("d")
>>> dx = sp.Symbol("dx")
>>> diffusion_term = diffusion(scalar=f, diffusion_coeff=d)
>>> discretization = Discretization2ndOrder() >>> discretization = Discretization2ndOrder()
>>> discretization(diffusion_term) >>> expected_output = ((f[-1, 0] + f[0, -1] - 4 * f[0, 0] + f[0, 1] + f[1, 0]) * d) / dx**2
(f_W*d + f_S*d - 4*f_C*d + f_N*d + f_E*d)/dx**2 >>> sp.simplify(discretization(diffusion_term) - expected_output)
0
""" """
if isinstance(scalar, Field): if isinstance(scalar, Field):
first_arg = scalar.center first_arg = scalar.center
...@@ -76,13 +79,6 @@ class Discretization2ndOrder: ...@@ -76,13 +79,6 @@ class Discretization2ndOrder:
self.dt = dt self.dt = dt
self.spatial_stencil = discretization_stencil_func self.spatial_stencil = discretization_stencil_func
@staticmethod
def _diff_order(e):
if not isinstance(e, Diff):
return 0
else:
return 1 + Discretization2ndOrder._diff_order(e.args[0])
def _discretize_diffusion(self, e): def _discretize_diffusion(self, e):
result = 0 result = 0
for c in range(e.dim): for c in range(e.dim):
...@@ -109,6 +105,7 @@ class Discretization2ndOrder: ...@@ -109,6 +105,7 @@ class Discretization2ndOrder:
return self._discretize_advection(e) return self._discretize_advection(e)
elif isinstance(e, Diff): elif isinstance(e, Diff):
arg, *indices = diff_args(e) arg, *indices = diff_args(e)
if not isinstance(arg, Field.Access): if not isinstance(arg, Field.Access):
raise ValueError("Only derivatives with field or field accesses as arguments can be discretized") raise ValueError("Only derivatives with field or field accesses as arguments can be discretized")
return self.spatial_stencil(indices, self.dx, arg) return self.spatial_stencil(indices, self.dx, arg)
...@@ -116,29 +113,6 @@ class Discretization2ndOrder: ...@@ -116,29 +113,6 @@ class Discretization2ndOrder:
new_args = [self._discretize_spatial(a) for a in e.args] new_args = [self._discretize_spatial(a) for a in e.args]
return e.func(*new_args) if new_args else e return e.func(*new_args) if new_args else e
def _discretize_diff(self, e):
order = self._diff_order(e)
if order == 1:
fa = e.args[0]
index = e.target
return (fa.neighbor(index, 1) - fa.neighbor(index, -1)) / (2 * self.dx)
elif order == 2:
indices = sorted([e.target, e.args[0].target])
fa = e.args[0].args[0]
if indices[0] == indices[1] and all(i >= 0 for i in indices):
result = (-2 * fa + fa.neighbor(indices[0], -1) + fa.neighbor(indices[0], +1))
elif indices[0] == indices[1]:
result = 0
for d in range(fa.field.spatial_dimensions):
result += (-2 * fa + fa.neighbor(d, -1) + fa.neighbor(d, +1))
else:
assert all(i >= 0 for i in indices)
offsets = [(1, 1), [-1, 1], [1, -1], [-1, -1]]
result = sum(o1 * o2 * fa.neighbor(indices[0], o1).neighbor(indices[1], o2) for o1, o2 in offsets) / 4
return result / (self.dx ** 2)
else:
raise NotImplementedError("Term contains derivatives of order > 2")
def __call__(self, expr): def __call__(self, expr):
if isinstance(expr, list): if isinstance(expr, list):
return [self(e) for e in expr] return [self(e) for e in expr]
...@@ -188,7 +162,7 @@ class Advection(sp.Function): ...@@ -188,7 +162,7 @@ class Advection(sp.Function):
return self.scalar.spatial_dimensions return self.scalar.spatial_dimensions
def _latex(self, printer): def _latex(self, printer):
name_suffix = "_%s" % self.scalar_index if self.scalar_index is not None else "" name_suffix = f"_{self.scalar_index}" if self.scalar_index is not None else ""
if isinstance(self.vector, Field): if isinstance(self.vector, Field):
return r"\nabla \cdot(%s %s)" % (printer.doprint(sp.Symbol(self.vector.name)), return r"\nabla \cdot(%s %s)" % (printer.doprint(sp.Symbol(self.vector.name)),
printer.doprint(sp.Symbol(self.scalar.name + name_suffix))) printer.doprint(sp.Symbol(self.scalar.name + name_suffix)))
...@@ -235,7 +209,7 @@ class Diffusion(sp.Function): ...@@ -235,7 +209,7 @@ class Diffusion(sp.Function):
return self.scalar.spatial_dimensions return self.scalar.spatial_dimensions
def _latex(self, printer): def _latex(self, printer):
name_suffix = "_%s" % self.scalar_index if self.scalar_index is not None else "" name_suffix = f"_{self.scalar_index}" if self.scalar_index is not None else ""
coeff = self.diffusion_coeff coeff = self.diffusion_coeff
diff_coeff = sp.Symbol(coeff.name) if isinstance(coeff, Field) else coeff diff_coeff = sp.Symbol(coeff.name) if isinstance(coeff, Field) else coeff
return r"div(%s \nabla %s)" % (printer.doprint(diff_coeff), return r"div(%s \nabla %s)" % (printer.doprint(diff_coeff),
...@@ -268,7 +242,7 @@ class Transient(sp.Function): ...@@ -268,7 +242,7 @@ class Transient(sp.Function):
return None if len(self.args) <= 1 else int(self.args[1]) return None if len(self.args) <= 1 else int(self.args[1])
def _latex(self, printer): def _latex(self, printer):
name_suffix = "_%s" % self.scalar_index if self.scalar_index is not None else "" name_suffix = f"_{self.scalar_index}" if self.scalar_index is not None else ""
return r"\partial_t %s" % (printer.doprint(sp.Symbol(self.scalar.name + name_suffix)),) return r"\partial_t %s" % (printer.doprint(sp.Symbol(self.scalar.name + name_suffix)),)
...@@ -311,8 +285,9 @@ def discretize_center(term, symbols_to_field_dict, dx, dim=3): ...@@ -311,8 +285,9 @@ def discretize_center(term, symbols_to_field_dict, dx, dim=3):
>>> term >>> term
x*x^Delta^0 x*x^Delta^0
>>> f = Field.create_generic('f', spatial_dimensions=3) >>> f = Field.create_generic('f', spatial_dimensions=3)
>>> discretize_center(term, { x: f }, dx=1, dim=3) >>> expected_output = f[0, 0, 0] * (-f[-1, 0, 0]/2 + f[1, 0, 0]/2)
f_C*(-f_W/2 + f_E/2) >>> sp.simplify(discretize_center(term, { x: f }, dx=1, dim=3) - expected_output)
0
""" """
substitutions = {} substitutions = {}
for symbols, field in symbols_to_field_dict.items(): for symbols, field in symbols_to_field_dict.items():
...@@ -362,7 +337,7 @@ def discretize_staggered(term, symbols_to_field_dict, coordinate, coordinate_off ...@@ -362,7 +337,7 @@ def discretize_staggered(term, symbols_to_field_dict, coordinate, coordinate_off
offset = [0] * dim offset = [0] * dim
offset[coordinate] = coordinate_offset offset[coordinate] = coordinate_offset
offset = np.array(offset, dtype=np.int) offset = np.array(offset, dtype=int)
gradient = grad(symbols)[coordinate] gradient = grad(symbols)[coordinate]
substitutions.update({s: (field[offset](i) + field(i)) / 2 for i, s in enumerate(symbols)}) substitutions.update({s: (field[offset](i) + field(i)) / 2 for i, s in enumerate(symbols)})
...@@ -394,8 +369,10 @@ def discretize_divergence(vector_term, symbols_to_field_dict, dx): ...@@ -394,8 +369,10 @@ def discretize_divergence(vector_term, symbols_to_field_dict, dx):
>>> x, dx = sp.symbols("x dx") >>> x, dx = sp.symbols("x dx")
>>> grad_x = grad(x, dim=3) >>> grad_x = grad(x, dim=3)
>>> f = Field.create_generic('f', spatial_dimensions=3) >>> f = Field.create_generic('f', spatial_dimensions=3)
>>> sp.simplify(discretize_divergence(grad_x, {x : f}, dx)) >>> expected_output = (f[-1, 0, 0] + f[0, -1, 0] + f[0, 0, -1] -
(f_W + f_S + f_B - 6*f_C + f_T + f_N + f_E)/dx**2 ... 6*f[0, 0, 0] + f[0, 0, 1] + f[0, 1, 0] + f[1, 0, 0])/dx**2
>>> sp.simplify(discretize_divergence(grad_x, {x : f}, dx) - expected_output)
0
""" """
dim = len(vector_term) dim = len(vector_term)
result = 0 result = 0
...@@ -408,7 +385,7 @@ def discretize_divergence(vector_term, symbols_to_field_dict, dx): ...@@ -408,7 +385,7 @@ def discretize_divergence(vector_term, symbols_to_field_dict, dx):
def __up_down_offsets(d, dim): def __up_down_offsets(d, dim):
coord = [0] * dim coord = [0] * dim
coord[d] = 1 coord[d] = 1
up = np.array(coord, dtype=np.int) up = np.array(coord, dtype=int)
coord[d] = -1 coord[d] = -1
down = np.array(coord, dtype=np.int) down = np.array(coord, dtype=int)
return up, down return up, down
import pystencils as ps
import sympy as sp
from pystencils.fd.derivation import FiniteDifferenceStaggeredStencilDerivation as FDS, \
FiniteDifferenceStencilDerivation as FD
import itertools
from collections import defaultdict
from collections.abc import Iterable
def get_access_and_direction(term):
direction1 = term.args[1]
if isinstance(term.args[0], ps.Field.Access): # first derivative
access = term.args[0]
direction = (direction1,)
elif isinstance(term.args[0], ps.fd.Diff): # nested derivative
if isinstance(term.args[0].args[0], ps.fd.Diff): # third or higher derivative
raise ValueError("can only handle first and second derivatives")
elif not isinstance(term.args[0].args[0], ps.Field.Access):
raise ValueError("can only handle derivatives of field accesses")
access, direction2 = term.args[0].args[:2]
direction = (direction1, direction2)
else:
raise NotImplementedError(f"can only deal with derivatives of field accesses, "
f"but not {type(term.args[0])}; expansion of derivatives probably failed")
return access, direction
class FVM1stOrder:
"""Finite-volume discretization
Args:
field: the field with the quantity to calculate, e.g. a concentration
flux: a list of sympy expressions that specify the flux, one for each cartesian direction
source: a list of sympy expressions that specify the source
"""
def __init__(self, field: ps.field.Field, flux=0, source=0):
def normalize(f, shape):
shape = tuple(s for s in shape if s != 1)
if not shape:
shape = None
if isinstance(f, sp.Array) or isinstance(f, Iterable) or isinstance(f, sp.Matrix):
return sp.Array(f, shape)
else:
return sp.Array([f] * (sp.Mul(*shape) if shape else 1))
self.c = field
self.dim = self.c.spatial_dimensions
self.j = normalize(flux, (self.dim, ) + self.c.index_shape)
self.q = normalize(source, self.c.index_shape)
def discrete_flux(self, flux_field: ps.field.Field):
"""Return a list of assignments for the discrete fluxes
Args:
flux_field: a staggered field to which the fluxes should be assigned
"""
assert ps.FieldType.is_staggered(flux_field)
num = 0
def discretize(term, neighbor):
nonlocal num
if isinstance(term, sp.Matrix):
nw = term.applyfunc(lambda t: discretize(t, neighbor))
return nw
elif isinstance(term, ps.field.Field.Access):
avg = (term.get_shifted(*neighbor) + term) * sp.Rational(1, 2)
return avg
elif isinstance(term, ps.fd.Diff):
access, direction = get_access_and_direction(term)
fds = FDS(neighbor, access.field.spatial_dimensions, direction,
free_weights_prefix=f'fvm_free_{num}' if sp.Matrix(neighbor).dot(neighbor) > 2 else None)
num += 1
return fds.apply(access)
if term.args:
new_args = [discretize(a, neighbor) for a in term.args]
return term.func(*new_args)
else:
return term
fluxes = self.j.applyfunc(ps.fd.derivative.expand_diff_full)
fluxes = [sp.Matrix(fluxes.tolist()[i]) if flux_field.index_dimensions > 1 else fluxes.tolist()[i]
for i in range(self.dim)]
A0 = sum([sp.Matrix(ps.stencil.direction_string_to_offset(d)).norm()
for d in flux_field.staggered_stencil]) / self.dim
discrete_fluxes = []
for neighbor in flux_field.staggered_stencil:
neighbor = ps.stencil.direction_string_to_offset(neighbor)
directional_flux = fluxes[0] * int(neighbor[0])
for i in range(1, self.dim):
directional_flux += fluxes[i] * int(neighbor[i])
discrete_flux = sp.simplify(discretize(directional_flux, neighbor))
free_weights = [s for s in discrete_flux.atoms(sp.Symbol) if s.name.startswith('fvm_free_')]
if len(free_weights) > 0:
discrete_flux = discrete_flux.collect(discrete_flux.atoms(ps.field.Field.Access))
access_counts = defaultdict(list)
for values in itertools.product([-1, 0, 1],
repeat=len(free_weights)):
subs = {free_weight: value for free_weight, value in zip(free_weights, values)}
simp = discrete_flux.subs(subs)
access_count = len(simp.atoms(ps.field.Field.Access))
access_counts[access_count].append(simp)
best_count = min(access_counts.keys())
discrete_flux = sum(access_counts[best_count]) / len(access_counts[best_count])
discrete_fluxes.append(discrete_flux / sp.Matrix(neighbor).norm())
if flux_field.index_dimensions > 1:
return [ps.Assignment(lhs, rhs / A0)
for i, d in enumerate(flux_field.staggered_stencil) if discrete_fluxes[i]
for lhs, rhs in zip(flux_field.staggered_vector_access(d), sp.simplify(discrete_fluxes[i]))]
else:
return [ps.Assignment(flux_field.staggered_access(d), sp.simplify(discrete_fluxes[i]) / A0)
for i, d in enumerate(flux_field.staggered_stencil)]
def discrete_source(self):
"""Return a list of assignments for the discrete source term"""
def discretize(term):
if isinstance(term, ps.fd.Diff):
access, direction = get_access_and_direction(term)
if self.dim == 2:
stencil = ["".join(a).replace(" ", "") for a in itertools.product("NS ", "EW ")
if "".join(a).strip()]
else:
stencil = ["".join(a).replace(" ", "") for a in itertools.product("NS ", "EW ", "TB ")
if "".join(a).strip()]
weights = None
for stencil in [["N", "S", "E", "W", "T", "B"][:2 * self.dim], stencil]:
stencil = [tuple(ps.stencil.direction_string_to_offset(d, self.dim)) for d in stencil]
derivation = FD(direction, stencil).get_stencil()
if not derivation.accuracy:
continue
weights = derivation.weights
# if the weights are underdefined, we can choose the free symbols to find the sparsest stencil
free_weights = set(itertools.chain(*[w.free_symbols for w in weights]))
if len(free_weights) > 0:
zero_counts = defaultdict(list)
for values in itertools.product([-1, -sp.Rational(1, 2), 0, 1, sp.Rational(1, 2)],
repeat=len(free_weights)):
subs = {free_weight: value for free_weight, value in zip(free_weights, values)}
weights = [w.subs(subs) for w in derivation.weights]
if not all(a == 0 for a in weights):
zero_count = sum([1 for w in weights if w == 0])
zero_counts[zero_count].append(weights)
best = zero_counts[max(zero_counts.keys())]
if len(best) > 1:
raise NotImplementedError("more than one suitable set of weights found, "
"don't know how to proceed")
weights = best[0]
break
if not weights:
raise Exception('the requested derivative cannot be performed with the available neighbors')
assert weights
if access._field.index_dimensions == 0:
return sum([access._field.__getitem__(point) * weight for point, weight in zip(stencil, weights)])
else:
total = access.get_shifted(*stencil[0]).at_index(*access.index) * weights[0]
for point, weight in zip(stencil[1:], weights[1:]):
addl = access.get_shifted(*point).at_index(*access.index) * weight
total += addl
return total
if term.args:
new_args = [discretize(a) for a in term.args]
return term.func(*new_args)
else:
return term
source = self.q.applyfunc(ps.fd.derivative.expand_diff_full)
source = source.applyfunc(discretize)
return [ps.Assignment(lhs, rhs) for lhs, rhs in zip(self.c.center_vector, sp.flatten(source)) if rhs]
def discrete_continuity(self, flux_field: ps.field.Field):
"""Return a list of assignments for the continuity equation, which includes the source term
Args:
flux_field: a staggered field from which the fluxes are taken
"""
assert ps.FieldType.is_staggered(flux_field)
neighbors = flux_field.staggered_stencil + [ps.stencil.inverse_direction_string(d)
for d in flux_field.staggered_stencil]
divergence = flux_field.staggered_vector_access(neighbors[0])
for d in neighbors[1:]:
divergence += flux_field.staggered_vector_access(d)
source = self.discrete_source()
source = {s.lhs: s.rhs for s in source}
return [ps.Assignment(lhs, (lhs - rhs + source[lhs]) if lhs in source else (lhs - rhs))
for lhs, rhs in zip(self.c.center_vector, divergence)]
def VOF(j: ps.field.Field, v: ps.field.Field, ρ: ps.field.Field):
"""Volume-of-fluid discretization of advection
Args:
j: the staggered field to write the fluxes to. Should have a D2Q9/D3Q27 stencil. Other stencils work too, but
incur a small error (D2Q5/D3Q7: v^2, D3Q19: v^3).
v: the flow velocity field
ρ: the quantity to advect
"""
assert ps.FieldType.is_staggered(j)
fluxes = [[] for i in range(j.index_shape[0])]
v0 = v.center_vector
for d, neighbor in enumerate(j.staggered_stencil):
c = ps.stencil.direction_string_to_offset(neighbor)
v1 = v.neighbor_vector(c)
# going out
cond = sp.And(*[sp.Or(c[i] * v0[i] > 0, c[i] == 0) for i in range(len(v0))])
overlap1 = [1 - sp.Abs(v0[i]) for i in range(len(v0))]
overlap2 = [c[i] * v0[i] for i in range(len(v0))]
overlap = sp.Mul(*[(overlap1[i] if c[i] == 0 else overlap2[i]) for i in range(len(v0))])
fluxes[d].append(ρ.center_vector * overlap * sp.Piecewise((1, cond), (0, True)))
# coming in
cond = sp.And(*[sp.Or(c[i] * v1[i] < 0, c[i] == 0) for i in range(len(v1))])
overlap1 = [1 - sp.Abs(v1[i]) for i in range(len(v1))]
overlap2 = [v1[i] for i in range(len(v1))]
overlap = sp.Mul(*[(overlap1[i] if c[i] == 0 else overlap2[i]) for i in range(len(v1))])
sign = (c == 1).sum() % 2 * 2 - 1
fluxes[d].append(sign * ρ.neighbor_vector(c) * overlap * sp.Piecewise((1, cond), (0, True)))
for i, ff in enumerate(fluxes):
fluxes[i] = ff[0]
for f in ff[1:]:
fluxes[i] += f
assignments = []
for i, d in enumerate(j.staggered_stencil):
for lhs, rhs in zip(j.staggered_vector_access(d).values(), fluxes[i].values()):
assignments.append(ps.Assignment(lhs, rhs))
return assignments
from functools import lru_cache
from typing import Tuple from typing import Tuple
import sympy as sp import sympy as sp
from pystencils.astnodes import LoopOverCoordinate from pystencils.astnodes import LoopOverCoordinate
from pystencils.cache import memorycache
from pystencils.fd import Diff from pystencils.fd import Diff
from pystencils.field import Field from pystencils.field import Field
from pystencils.transformations import generic_visit from pystencils.transformations import generic_visit
...@@ -72,43 +72,12 @@ def fd_stencils_forth_order_isotropic(indices, dx, fa): ...@@ -72,43 +72,12 @@ def fd_stencils_forth_order_isotropic(indices, dx, fa):
return stencils[dim].apply(fa) / dx return stencils[dim].apply(fa) / dx
def fd_stencils_isotropic_high_density_code(indices, dx, fa):
dim = fa.field.spatial_dimensions
if dim == 1:
return fd_stencils_standard(indices, dx, fa)
elif dim == 2:
order = len(indices)
if order == 1:
idx = indices[0]
assert 0 <= idx < 2
other_idx = 1 if indices[0] == 0 else 0
weights = {-1: sp.Rational(1, 12) / dx,
0: sp.Rational(1, 3) / dx,
1: sp.Rational(1, 12) / dx}
upper_terms = sum(fa.neighbor(idx, +1).neighbor(other_idx, off) * w for off, w in weights.items())
lower_terms = sum(fa.neighbor(idx, -1).neighbor(other_idx, off) * w for off, w in weights.items())
return upper_terms - lower_terms
elif order == 2:
if indices[0] == indices[1]:
idx = indices[0]
diagonals = sp.Rational(1, 8) * sum(fa.neighbor(0, i).neighbor(1, j) for i in (-1, 1) for j in (-1, 1))
div_direction = sp.Rational(1, 2) * sum(fa.neighbor(idx, i) for i in (-1, 1))
center = - sp.Rational(3, 2) * fa
return (diagonals + div_direction + center) / (dx ** 2)
else:
return fd_stencils_standard(indices, dx, fa)
raise NotImplementedError("Supports only derivatives up to order 2 for 1D and 2D setups")
def discretize_spatial(expr, dx, stencil=fd_stencils_standard): def discretize_spatial(expr, dx, stencil=fd_stencils_standard):
if isinstance(stencil, str): if isinstance(stencil, str):
if stencil == 'standard': if stencil == 'standard':
stencil = fd_stencils_standard stencil = fd_stencils_standard
elif stencil == 'isotropic': elif stencil == 'isotropic':
stencil = fd_stencils_isotropic stencil = fd_stencils_isotropic
elif stencil == 'isotropic_hd':
stencil = fd_stencils_isotropic_high_density_code
else: else:
raise ValueError("Unknown stencil. Supported 'standard' and 'isotropic'") raise ValueError("Unknown stencil. Supported 'standard' and 'isotropic'")
...@@ -167,9 +136,7 @@ def discretize_spatial_staggered(expr, dx, stencil=fd_stencils_standard): ...@@ -167,9 +136,7 @@ def discretize_spatial_staggered(expr, dx, stencil=fd_stencils_standard):
# -------------------------------------- special stencils -------------------------------------------------------------- # -------------------------------------- special stencils --------------------------------------------------------------
@lru_cache(maxsize=1)
@memorycache(maxsize=1)
def forth_order_2d_derivation() -> Tuple[FiniteDifferenceStencilDerivation.Result, ...]: def forth_order_2d_derivation() -> Tuple[FiniteDifferenceStencilDerivation.Result, ...]:
# Symmetry, isotropy and 4th order conditions are not enough to fully specify the stencil # Symmetry, isotropy and 4th order conditions are not enough to fully specify the stencil
# one weight has to be specifically set to a somewhat arbitrary value # one weight has to be specifically set to a somewhat arbitrary value
......
import functools
import hashlib import hashlib
import operator
import pickle import pickle
import re import re
from enum import Enum from enum import Enum
from itertools import chain from itertools import chain
from typing import List, Optional, Sequence, Set, Tuple from typing import List, Optional, Sequence, Set, Tuple, Union
import numpy as np import numpy as np
import sympy as sp import sympy as sp
from sympy.core.cache import cacheit from sympy.core.cache import cacheit
import pystencils
from pystencils.alignedarray import aligned_empty from pystencils.alignedarray import aligned_empty
from pystencils.data_types import StructType, TypedSymbol, create_type from pystencils.typing import StructType, TypedSymbol, BasicType, create_type
from pystencils.kernelparameters import FieldShapeSymbol, FieldStrideSymbol from pystencils.typing.typed_sympy import FieldShapeSymbol, FieldStrideSymbol
from pystencils.stencil import direction_string_to_offset, offset_to_direction_string from pystencils.stencil import (
direction_string_to_offset, inverse_direction, offset_to_direction_string)
from pystencils.sympyextensions import is_integer_sequence from pystencils.sympyextensions import is_integer_sequence
__all__ = ['Field', 'fields', 'FieldType', 'AbstractField'] __all__ = ['Field', 'fields', 'FieldType', 'Field']
def fields(description=None, index_dimensions=0, layout=None, **kwargs):
"""Creates pystencils fields from a string description.
Examples:
Create a 2D scalar and vector field:
>>> s, v = fields("s, v(2): double[2D]")
>>> assert s.spatial_dimensions == 2 and s.index_dimensions == 0
>>> assert (v.spatial_dimensions, v.index_dimensions, v.index_shape) == (2, 1, (2,))
Create an integer field of shape (10, 20):
>>> f = fields("f : int32[10, 20]")
>>> f.has_fixed_shape, f.shape
(True, (10, 20))
Numpy arrays can be used as template for shape and data type of field:
>>> arr_s, arr_v = np.zeros([20, 20]), np.zeros([20, 20, 2])
>>> s, v = fields("s, v(2)", s=arr_s, v=arr_v)
>>> assert s.index_dimensions == 0 and s.dtype.numpy_dtype == arr_s.dtype
>>> assert v.index_shape == (2,)
Format string can be left out, field names are taken from keyword arguments.
>>> fields(f1=arr_s, f2=arr_s)
[f1, f2]
The keyword names ``index_dimension`` and ``layout`` have special meaning, don't use them for field names
>>> f = fields(f=arr_v, index_dimensions=1)
>>> assert f.index_dimensions == 1
>>> f = fields("pdfs(19) : float32[3D]", layout='fzyx')
>>> f.layout
(2, 1, 0)
"""
result = []
if description:
field_descriptions, dtype, shape = _parse_description(description)
layout = 'numpy' if layout is None else layout
for field_name, idx_shape in field_descriptions:
if field_name in kwargs:
arr = kwargs[field_name]
idx_shape_of_arr = () if not len(idx_shape) else arr.shape[-len(idx_shape):]
assert idx_shape_of_arr == idx_shape
f = Field.create_from_numpy_array(field_name, kwargs[field_name], index_dimensions=len(idx_shape))
elif isinstance(shape, tuple):
f = Field.create_fixed_size(field_name, shape + idx_shape, dtype=dtype,
index_dimensions=len(idx_shape), layout=layout)
elif isinstance(shape, int):
f = Field.create_generic(field_name, spatial_dimensions=shape, dtype=dtype,
index_shape=idx_shape, layout=layout)
elif shape is None:
f = Field.create_generic(field_name, spatial_dimensions=2, dtype=dtype,
index_shape=idx_shape, layout=layout)
else:
assert False
result.append(f)
else:
assert layout is None, "Layout can not be specified when creating Field from numpy array"
for field_name, arr in kwargs.items():
result.append(Field.create_from_numpy_array(field_name, arr, index_dimensions=index_dimensions))
if len(result) == 0:
return None
elif len(result) == 1:
return result[0]
else:
return result
class FieldType(Enum): class FieldType(Enum):
...@@ -96,6 +33,10 @@ class FieldType(Enum): ...@@ -96,6 +33,10 @@ class FieldType(Enum):
# unsafe fields may be accessed in an absolute fashion - the index depends on the data # unsafe fields may be accessed in an absolute fashion - the index depends on the data
# and thus may lead to out-of-bounds accesses # and thus may lead to out-of-bounds accesses
CUSTOM = 3 CUSTOM = 3
# staggered field
STAGGERED = 4
# staggered field that reverses sign when accessed via opposite direction
STAGGERED_FLUX = 5
@staticmethod @staticmethod
def is_generic(field): def is_generic(field):
...@@ -117,14 +58,18 @@ class FieldType(Enum): ...@@ -117,14 +58,18 @@ class FieldType(Enum):
assert isinstance(field, Field) assert isinstance(field, Field)
return field.field_type == FieldType.CUSTOM return field.field_type == FieldType.CUSTOM
@staticmethod
def is_staggered(field):
assert isinstance(field, Field)
return field.field_type == FieldType.STAGGERED or field.field_type == FieldType.STAGGERED_FLUX
class AbstractField: @staticmethod
def is_staggered_flux(field):
class AbstractAccess: assert isinstance(field, Field)
pass return field.field_type == FieldType.STAGGERED_FLUX
class Field(AbstractField): class Field:
""" """
With fields one can formulate stencil-like update rules on structured grids. With fields one can formulate stencil-like update rules on structured grids.
This Field class knows about the dimension, memory layout (strides) and optionally about the size of an array. This Field class knows about the dimension, memory layout (strides) and optionally about the size of an array.
...@@ -156,6 +101,14 @@ class Field(AbstractField): ...@@ -156,6 +101,14 @@ class Field(AbstractField):
First specify the spatial offsets in [], then in case index_dimension>0 the indices in () First specify the spatial offsets in [], then in case index_dimension>0 the indices in ()
e.g. ``f[-1,0,0](7)`` e.g. ``f[-1,0,0](7)``
Staggered Fields:
Staggered fields are used to store a value on a second grid shifted by half a cell with respect to the usual
grid.
The first index dimension is used to specify the position on the staggered grid (e.g. 0 means half-way to the
eastern neighbor, 1 is half-way to the northern neighbor, etc.), while additional indices can be used to store
multiple values at each position.
Example using no index dimensions: Example using no index dimensions:
>>> a = np.zeros([10, 10]) >>> a = np.zeros([10, 10])
>>> f = Field.create_from_numpy_array("f", a, index_dimensions=0) >>> f = Field.create_from_numpy_array("f", a, index_dimensions=0)
...@@ -185,8 +138,9 @@ class Field(AbstractField): ...@@ -185,8 +138,9 @@ class Field(AbstractField):
index_shape: optional shape of the index dimensions i.e. maximum values allowed for each index dimension, index_shape: optional shape of the index dimensions i.e. maximum values allowed for each index dimension,
has to be a list or tuple has to be a list or tuple
field_type: besides the normal GENERIC fields, there are INDEXED fields that store indices of the domain field_type: besides the normal GENERIC fields, there are INDEXED fields that store indices of the domain
that should be iterated over, and BUFFER fields that are used to generate that should be iterated over, BUFFER fields that are used to generate communication
communication packing/unpacking kernels packing/unpacking kernels, and STAGGERED fields, which store values half-way to the next
cell
""" """
if index_shape is not None: if index_shape is not None:
assert index_dimensions == 0 or index_dimensions == len(index_shape) assert index_dimensions == 0 or index_dimensions == len(index_shape)
...@@ -208,11 +162,14 @@ class Field(AbstractField): ...@@ -208,11 +162,14 @@ class Field(AbstractField):
raise ValueError("Structured arrays/fields are not allowed to have an index dimension") raise ValueError("Structured arrays/fields are not allowed to have an index dimension")
shape += (1,) shape += (1,)
strides += (1,) strides += (1,)
if field_type == FieldType.STAGGERED and index_dimensions == 0:
raise ValueError("A staggered field needs at least one index dimension")
return Field(field_name, field_type, dtype, layout, shape, strides) return Field(field_name, field_type, dtype, layout, shape, strides)
@staticmethod @staticmethod
def create_from_numpy_array(field_name: str, array: np.ndarray, index_dimensions: int = 0) -> 'Field': def create_from_numpy_array(field_name: str, array: np.ndarray, index_dimensions: int = 0,
field_type=FieldType.GENERIC) -> 'Field':
"""Creates a field based on the layout, data type, and shape of a given numpy array. """Creates a field based on the layout, data type, and shape of a given numpy array.
Kernels created for these kind of fields can only be called with arrays of the same layout, shape and type. Kernels created for these kind of fields can only be called with arrays of the same layout, shape and type.
...@@ -221,6 +178,7 @@ class Field(AbstractField): ...@@ -221,6 +178,7 @@ class Field(AbstractField):
field_name: symbolic name for the field field_name: symbolic name for the field
array: numpy array array: numpy array
index_dimensions: see documentation of Field index_dimensions: see documentation of Field
field_type: kind of field
""" """
spatial_dimensions = len(array.shape) - index_dimensions spatial_dimensions = len(array.shape) - index_dimensions
if spatial_dimensions < 1: if spatial_dimensions < 1:
...@@ -239,12 +197,15 @@ class Field(AbstractField): ...@@ -239,12 +197,15 @@ class Field(AbstractField):
raise ValueError("Structured arrays/fields are not allowed to have an index dimension") raise ValueError("Structured arrays/fields are not allowed to have an index dimension")
shape += (1,) shape += (1,)
strides += (1,) strides += (1,)
if field_type == FieldType.STAGGERED and index_dimensions == 0:
raise ValueError("A staggered field needs at least one index dimension")
return Field(field_name, FieldType.GENERIC, array.dtype, spatial_layout, shape, strides) return Field(field_name, field_type, array.dtype, spatial_layout, shape, strides)
@staticmethod @staticmethod
def create_fixed_size(field_name: str, shape: Tuple[int, ...], index_dimensions: int = 0, def create_fixed_size(field_name: str, shape: Tuple[int, ...], index_dimensions: int = 0,
dtype=np.float64, layout: str = 'numpy', strides: Optional[Sequence[int]] = None) -> 'Field': dtype=np.float64, layout: str = 'numpy', strides: Optional[Sequence[int]] = None,
field_type=FieldType.GENERIC) -> 'Field':
""" """
Creates a field with fixed sizes i.e. can be called only with arrays of the same size and layout Creates a field with fixed sizes i.e. can be called only with arrays of the same size and layout
...@@ -255,6 +216,7 @@ class Field(AbstractField): ...@@ -255,6 +216,7 @@ class Field(AbstractField):
dtype: numpy data type of the array the kernel is called with later dtype: numpy data type of the array the kernel is called with later
layout: full layout of array, not only spatial dimensions layout: full layout of array, not only spatial dimensions
strides: strides in bytes or None to automatically compute them from shape (assuming no padding) strides: strides in bytes or None to automatically compute them from shape (assuming no padding)
field_type: kind of field
""" """
spatial_dimensions = len(shape) - index_dimensions spatial_dimensions = len(shape) - index_dimensions
assert spatial_dimensions >= 1 assert spatial_dimensions >= 1
...@@ -275,11 +237,13 @@ class Field(AbstractField): ...@@ -275,11 +237,13 @@ class Field(AbstractField):
raise ValueError("Structured arrays/fields are not allowed to have an index dimension") raise ValueError("Structured arrays/fields are not allowed to have an index dimension")
shape += (1,) shape += (1,)
strides += (1,) strides += (1,)
if field_type == FieldType.STAGGERED and index_dimensions == 0:
raise ValueError("A staggered field needs at least one index dimension")
spatial_layout = list(layout) spatial_layout = list(layout)
for i in range(spatial_dimensions, len(layout)): for i in range(spatial_dimensions, len(layout)):
spatial_layout.remove(i) spatial_layout.remove(i)
return Field(field_name, FieldType.GENERIC, dtype, tuple(spatial_layout), shape, strides) return Field(field_name, field_type, dtype, tuple(spatial_layout), shape, strides)
def __init__(self, field_name, field_type, dtype, layout, shape, strides): def __init__(self, field_name, field_type, dtype, layout, shape, strides):
"""Do not use directly. Use static create* methods""" """Do not use directly. Use static create* methods"""
...@@ -291,14 +255,17 @@ class Field(AbstractField): ...@@ -291,14 +255,17 @@ class Field(AbstractField):
self._layout = normalize_layout(layout) self._layout = normalize_layout(layout)
self.shape = shape self.shape = shape
self.strides = strides self.strides = strides
self.latex_name = None # type: Optional[str] self.latex_name: Optional[str] = None
self.coordinate_origin = sp.Matrix([0] * self.spatial_dimensions)
self.coordinate_transform = sp.eye(self.spatial_dimensions)
if field_type == FieldType.STAGGERED:
assert self.staggered_stencil
def new_field_with_different_name(self, new_name): def new_field_with_different_name(self, new_name):
if self.has_fixed_shape: if self.has_fixed_shape:
return Field(new_name, self.field_type, self._dtype, self._layout, self.shape, self.strides) return Field(new_name, self.field_type, self._dtype, self._layout, self.shape, self.strides)
else: else:
return Field.create_generic(new_name, self.spatial_dimensions, self.dtype.numpy_dtype, return Field(new_name, self.field_type, self.dtype, self.layout, self.shape, self.strides)
self.index_dimensions, self._layout, self.index_shape, self.field_type)
@property @property
def spatial_dimensions(self) -> int: def spatial_dimensions(self) -> int:
...@@ -312,6 +279,9 @@ class Field(AbstractField): ...@@ -312,6 +279,9 @@ class Field(AbstractField):
def ndim(self) -> int: def ndim(self) -> int:
return len(self.shape) return len(self.shape)
def values_per_cell(self) -> int:
return functools.reduce(operator.mul, self.index_shape, 1)
@property @property
def layout(self): def layout(self):
return self._layout return self._layout
...@@ -348,8 +318,24 @@ class Field(AbstractField): ...@@ -348,8 +318,24 @@ class Field(AbstractField):
def dtype(self): def dtype(self):
return self._dtype return self._dtype
@property
def itemsize(self):
return self.dtype.numpy_dtype.itemsize
def __repr__(self): def __repr__(self):
return self._field_name if any(isinstance(s, sp.Symbol) for s in self.spatial_shape):
spatial_shape_str = f'{self.spatial_dimensions}d'
else:
spatial_shape_str = ','.join(str(i) for i in self.spatial_shape)
index_shape_str = ','.join(str(i) for i in self.index_shape)
if self.index_shape:
return f'{self._field_name}({index_shape_str}): {self.dtype}[{spatial_shape_str}]'
else:
return f'{self._field_name}: {self.dtype}[{spatial_shape_str}]'
def __str__(self):
return self.name
def neighbor(self, coord_id, offset): def neighbor(self, coord_id, offset):
offset_list = [0] * self.spatial_dimensions offset_list = [0] * self.spatial_dimensions
...@@ -364,19 +350,37 @@ class Field(AbstractField): ...@@ -364,19 +350,37 @@ class Field(AbstractField):
index_shape = self.index_shape index_shape = self.index_shape
if len(index_shape) == 0: if len(index_shape) == 0:
return sp.Matrix([self.center]) return sp.Matrix([self.center])
if len(index_shape) == 1: elif len(index_shape) == 1:
return sp.Matrix([self(i) for i in range(index_shape[0])]) return sp.Matrix([self(i) for i in range(index_shape[0])])
elif len(index_shape) == 2: elif len(index_shape) == 2:
def cb(*args): return sp.Matrix([[self(i, j) for j in range(index_shape[1])] for i in range(index_shape[0])])
r = self.__call__(*args) elif len(index_shape) == 3:
return r return sp.Array([[[self(i, j, k) for k in range(index_shape[2])]
return sp.Matrix(*index_shape, cb) for j in range(index_shape[1])] for i in range(index_shape[0])])
else:
raise NotImplementedError("center_vector is not implemented for more than 3 index dimensions")
@property @property
def center(self): def center(self):
center = tuple([0] * self.spatial_dimensions) center = tuple([0] * self.spatial_dimensions)
return Field.Access(self, center) return Field.Access(self, center)
def neighbor_vector(self, offset):
"""Like neighbor, but returns the entire vector/tensor stored at offset."""
if self.spatial_dimensions == 2 and len(offset) == 3:
assert offset[2] == 0
offset = offset[:2]
if self.index_dimensions == 0:
return sp.Matrix([self.__getitem__(offset)])
elif self.index_dimensions == 1:
return sp.Matrix([self.__getitem__(offset)(i) for i in range(self.index_shape[0])])
elif self.index_dimensions == 2:
return sp.Matrix([[self.__getitem__(offset)(i, k) for k in range(self.index_shape[1])]
for i in range(self.index_shape[0])])
else:
raise NotImplementedError("neighbor_vector is not implemented for more than 2 index dimensions")
def __getitem__(self, offset): def __getitem__(self, offset):
if type(offset) is np.ndarray: if type(offset) is np.ndarray:
offset = tuple(offset) offset = tuple(offset)
...@@ -385,21 +389,115 @@ class Field(AbstractField): ...@@ -385,21 +389,115 @@ class Field(AbstractField):
if type(offset) is not tuple: if type(offset) is not tuple:
offset = (offset,) offset = (offset,)
if len(offset) != self.spatial_dimensions: if len(offset) != self.spatial_dimensions:
raise ValueError("Wrong number of spatial indices: " raise ValueError(f"Wrong number of spatial indices: Got {len(offset)}, expected {self.spatial_dimensions}")
"Got %d, expected %d" % (len(offset), self.spatial_dimensions))
return Field.Access(self, offset) return Field.Access(self, offset)
def absolute_access(self, offset, index): def absolute_access(self, offset, index):
assert FieldType.is_custom(self) assert FieldType.is_custom(self)
return Field.Access(self, offset, index, is_absolute_access=True) return Field.Access(self, offset, index, is_absolute_access=True)
def staggered_access(self, offset, index=None):
"""If this field is a staggered field, it can be accessed using half-integer offsets.
For example, an offset of ``(0, sp.Rational(1,2))`` or ``"E"`` corresponds to the staggered point to the east
of the cell center, i.e. half-way to the eastern-next cell.
If the field stores more than one value per staggered point (e.g. a vector or a tensor), the index (integer or
tuple of integers) refers to which of these values to access.
"""
assert FieldType.is_staggered(self)
offset_orig = offset
if type(offset) is np.ndarray:
offset = tuple(offset)
if type(offset) is str:
offset = tuple(direction_string_to_offset(offset, self.spatial_dimensions))
offset = tuple([o * sp.Rational(1, 2) for o in offset])
if len(offset) != self.spatial_dimensions:
raise ValueError(f"Wrong number of spatial indices: Got {len(offset)}, expected {self.spatial_dimensions}")
prefactor = 1
neighbor_vec = [0] * len(offset)
for i in range(self.spatial_dimensions):
if (offset[i] + sp.Rational(1, 2)).is_Integer:
neighbor_vec[i] = sp.sign(offset[i])
neighbor = offset_to_direction_string(neighbor_vec)
if neighbor not in self.staggered_stencil:
neighbor_vec = inverse_direction(neighbor_vec)
neighbor = offset_to_direction_string(neighbor_vec)
if FieldType.is_staggered_flux(self):
prefactor = -1
if neighbor not in self.staggered_stencil:
raise ValueError(f"{offset_orig} is not a valid neighbor for the {self.staggered_stencil_name} stencil")
offset = tuple(sp.Matrix(offset) - sp.Rational(1, 2) * sp.Matrix(neighbor_vec))
idx = self.staggered_stencil.index(neighbor)
if self.index_dimensions == 1: # this field stores a scalar value at each staggered position
if index is not None:
raise ValueError("Cannot specify an index for a scalar staggered field")
return prefactor * Field.Access(self, offset, (idx,))
else: # this field stores a vector or tensor at each staggered position
if index is None:
raise ValueError(f"Wrong number of indices: Got 0, expected {self.index_dimensions - 1}")
if type(index) is np.ndarray:
index = tuple(index)
if type(index) is not tuple:
index = (index,)
if self.index_dimensions != len(index) + 1:
raise ValueError(f"Wrong number of indices: Got {len(index)}, expected {self.index_dimensions - 1}")
return prefactor * Field.Access(self, offset, (idx, *index))
def staggered_vector_access(self, offset):
"""Like staggered_access, but returns the entire vector/tensor stored at offset."""
assert FieldType.is_staggered(self)
if self.index_dimensions == 1:
return sp.Matrix([self.staggered_access(offset)])
elif self.index_dimensions == 2:
return sp.Matrix([self.staggered_access(offset, i) for i in range(self.index_shape[1])])
elif self.index_dimensions == 3:
return sp.Matrix([[self.staggered_access(offset, (i, k)) for k in range(self.index_shape[2])]
for i in range(self.index_shape[1])])
else:
raise NotImplementedError("staggered_vector_access is not implemented for more than 3 index dimensions")
@property
def staggered_stencil(self):
assert FieldType.is_staggered(self)
stencils = {
2: {
2: ["W", "S"], # D2Q5
4: ["W", "S", "SW", "NW"] # D2Q9
},
3: {
3: ["W", "S", "B"], # D3Q7
7: ["W", "S", "B", "BSW", "TSW", "BNW", "TNW"], # D3Q15
9: ["W", "S", "B", "SW", "NW", "BW", "TW", "BS", "TS"], # D3Q19
13: ["W", "S", "B", "SW", "NW", "BW", "TW", "BS", "TS", "BSW", "TSW", "BNW", "TNW"] # D3Q27
}
}
if not self.index_shape[0] in stencils[self.spatial_dimensions]:
raise ValueError(f"No known stencil has {self.index_shape[0]} staggered points")
return stencils[self.spatial_dimensions][self.index_shape[0]]
@property
def staggered_stencil_name(self):
assert FieldType.is_staggered(self)
return f"D{self.spatial_dimensions}Q{self.index_shape[0] * 2 + 1}"
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
center = tuple([0] * self.spatial_dimensions) center = tuple([0] * self.spatial_dimensions)
return Field.Access(self, center)(*args, **kwargs) return Field.Access(self, center)(*args, **kwargs)
def hashable_contents(self): def hashable_contents(self):
dth = hash(self._dtype) return (self._layout,
return self._layout, self.shape, self.strides, dth, self.field_type, self._field_name, self.latex_name self.shape,
self.strides,
self.field_type,
self._field_name,
self.latex_name,
self._dtype)
def __hash__(self): def __hash__(self):
return hash(self.hashable_contents()) return hash(self.hashable_contents())
...@@ -409,8 +507,48 @@ class Field(AbstractField): ...@@ -409,8 +507,48 @@ class Field(AbstractField):
return False return False
return self.hashable_contents() == other.hashable_contents() return self.hashable_contents() == other.hashable_contents()
@property
def physical_coordinates(self):
if hasattr(self.coordinate_transform, '__call__'):
return self.coordinate_transform(self.coordinate_origin + pystencils.x_vector(self.spatial_dimensions))
else:
return self.coordinate_transform @ (self.coordinate_origin + pystencils.x_vector(self.spatial_dimensions))
@property
def physical_coordinates_staggered(self):
return self.coordinate_transform @ \
(self.coordinate_origin + pystencils.x_staggered_vector(self.spatial_dimensions))
def index_to_physical(self, index_coordinates: sp.Matrix, staggered=False):
if staggered:
index_coordinates = sp.Matrix([0.5] * len(self.coordinate_origin)) + index_coordinates
if hasattr(self.coordinate_transform, '__call__'):
return self.coordinate_transform(self.coordinate_origin + index_coordinates)
else:
return self.coordinate_transform @ (self.coordinate_origin + index_coordinates)
def physical_to_index(self, physical_coordinates: sp.Matrix, staggered=False):
if hasattr(self.coordinate_transform, '__call__'):
if hasattr(self.coordinate_transform, 'inv'):
return self.coordinate_transform.inv()(physical_coordinates) - self.coordinate_origin
else:
idx = sp.Matrix(sp.symbols(f'index_coordinates:{self.ndim}', real=True))
rtn = sp.solve(self.index_to_physical(idx) - physical_coordinates, idx)
assert rtn, f'Could not find inverese of coordinate_transform: {self.index_to_physical(idx)}'
return rtn
else:
rtn = self.coordinate_transform.inv() @ physical_coordinates - self.coordinate_origin
if staggered:
rtn = sp.Matrix([i - 0.5 for i in rtn])
return rtn
def set_coordinate_origin_to_field_center(self):
self.coordinate_origin = -sp.Matrix([i / 2 for i in self.spatial_shape])
# noinspection PyAttributeOutsideInit,PyUnresolvedReferences # noinspection PyAttributeOutsideInit,PyUnresolvedReferences
class Access(TypedSymbol, AbstractField.AbstractAccess): class Access(TypedSymbol):
"""Class representing a relative access into a `Field`. """Class representing a relative access into a `Field`.
This class behaves like a normal sympy Symbol, it is actually derived from it. One can built up This class behaves like a normal sympy Symbol, it is actually derived from it. One can built up
...@@ -429,11 +567,13 @@ class Field(AbstractField): ...@@ -429,11 +567,13 @@ class Field(AbstractField):
>>> central_y_component.at_index(0) # change component >>> central_y_component.at_index(0) # change component
v_C^0 v_C^0
""" """
_iterable = False # see https://i10git.cs.fau.de/pycodegen/pystencils/-/merge_requests/166#note_10680
def __new__(cls, name, *args, **kwargs): def __new__(cls, name, *args, **kwargs):
obj = Field.Access.__xnew_cached_(cls, name, *args, **kwargs) obj = Field.Access.__xnew_cached_(cls, name, *args, **kwargs)
return obj return obj
def __new_stage2__(self, field, offsets=(0, 0, 0), idx=None, is_absolute_access=False): def __new_stage2__(self, field, offsets=(0, 0, 0), idx=None, is_absolute_access=False, dtype=None):
field_name = field.name field_name = field.name
offsets_and_index = (*offsets, *idx) if idx is not None else offsets offsets_and_index = (*offsets, *idx) if idx is not None else offsets
constant_offsets = not any([isinstance(o, sp.Basic) and not o.is_Integer for o in offsets_and_index]) constant_offsets = not any([isinstance(o, sp.Basic) and not o.is_Integer for o in offsets_and_index])
...@@ -458,11 +598,15 @@ class Field(AbstractField): ...@@ -458,11 +598,15 @@ class Field(AbstractField):
offset_name = hashlib.md5(pickle.dumps(offsets_and_index)).hexdigest()[:12] offset_name = hashlib.md5(pickle.dumps(offsets_and_index)).hexdigest()[:12]
superscript = None superscript = None
symbol_name = "%s_%s" % (field_name, offset_name) symbol_name = f"{field_name}_{offset_name}"
if superscript is not None: if superscript is not None:
symbol_name += "^" + superscript symbol_name += "^" + superscript
obj = super(Field.Access, self).__xnew__(self, symbol_name, field.dtype) if dtype:
obj = super(Field.Access, self).__xnew__(self, symbol_name, dtype)
else:
obj = super(Field.Access, self).__xnew__(self, symbol_name, field.dtype)
obj._field = field obj._field = field
obj._offsets = [] obj._offsets = []
for o in offsets: for o in offsets:
...@@ -470,7 +614,7 @@ class Field(AbstractField): ...@@ -470,7 +614,7 @@ class Field(AbstractField):
obj._offsets.append(o) obj._offsets.append(o)
else: else:
obj._offsets.append(int(o)) obj._offsets.append(int(o))
obj._offsets = tuple(obj._offsets) obj._offsets = tuple(sp.sympify(obj._offsets))
obj._offsetName = offset_name obj._offsetName = offset_name
obj._superscript = superscript obj._superscript = superscript
obj._index = idx obj._index = idx
...@@ -484,7 +628,10 @@ class Field(AbstractField): ...@@ -484,7 +628,10 @@ class Field(AbstractField):
return obj return obj
def __getnewargs__(self): def __getnewargs__(self):
return self.field, self.offsets, self.index, self.is_absolute_access return self.field, self.offsets, self.index, self.is_absolute_access, self.dtype
def __getnewargs_ex__(self):
return (self.field, self.offsets, self.index, self.is_absolute_access, self.dtype), {}
# noinspection SpellCheckingInspection # noinspection SpellCheckingInspection
__xnew__ = staticmethod(__new_stage2__) __xnew__ = staticmethod(__new_stage2__)
...@@ -501,18 +648,18 @@ class Field(AbstractField): ...@@ -501,18 +648,18 @@ class Field(AbstractField):
idx = () idx = ()
if len(idx) != self.field.index_dimensions: if len(idx) != self.field.index_dimensions:
raise ValueError("Wrong number of indices: " raise ValueError(f"Wrong number of indices: Got {len(idx)}, expected {self.field.index_dimensions}")
"Got %d, expected %d" % (len(idx), self.field.index_dimensions)) if len(idx) == 1 and isinstance(idx[0], str):
return Field.Access(self.field, self._offsets, idx) dtype = BasicType(self.field.dtype.numpy_dtype[idx[0]])
return Field.Access(self.field, self._offsets, idx,
is_absolute_access=self.is_absolute_access, dtype=dtype)
else:
return Field.Access(self.field, self._offsets, idx,
is_absolute_access=self.is_absolute_access, dtype=self.dtype)
def __getitem__(self, *idx): def __getitem__(self, *idx):
return self.__call__(*idx) return self.__call__(*idx)
def __iter__(self):
"""This is necessary to work with parts of sympy that test if an object is iterable (e.g. simplify).
The __getitem__ would make it iterable"""
raise TypeError("Field access is not iterable")
@property @property
def field(self) -> 'Field': def field(self) -> 'Field':
"""Field that the Access points to""" """Field that the Access points to"""
...@@ -562,7 +709,8 @@ class Field(AbstractField): ...@@ -562,7 +709,8 @@ class Field(AbstractField):
""" """
offset_list = list(self.offsets) offset_list = list(self.offsets)
offset_list[coord_id] += offset offset_list[coord_id] += offset
return Field.Access(self.field, tuple(offset_list), self.index) return Field.Access(self.field, tuple(offset_list), self.index,
is_absolute_access=self.is_absolute_access, dtype=self.dtype)
def get_shifted(self, *shift) -> 'Field.Access': def get_shifted(self, *shift) -> 'Field.Access':
"""Returns a new Access with changed spatial coordinates """Returns a new Access with changed spatial coordinates
...@@ -572,7 +720,11 @@ class Field(AbstractField): ...@@ -572,7 +720,11 @@ class Field(AbstractField):
>>> f[0,0].get_shifted(1, 1) >>> f[0,0].get_shifted(1, 1)
f_NE f_NE
""" """
return Field.Access(self.field, tuple(a + b for a, b in zip(shift, self.offsets)), self.index) return Field.Access(self.field,
tuple(a + b for a, b in zip(shift, self.offsets)),
self.index,
is_absolute_access=self.is_absolute_access,
dtype=self.dtype)
def at_index(self, *idx_tuple) -> 'Field.Access': def at_index(self, *idx_tuple) -> 'Field.Access':
"""Returns new Access with changed index. """Returns new Access with changed index.
...@@ -582,7 +734,15 @@ class Field(AbstractField): ...@@ -582,7 +734,15 @@ class Field(AbstractField):
>>> f(0).at_index(8) >>> f(0).at_index(8)
f_C^8 f_C^8
""" """
return Field.Access(self.field, self.offsets, idx_tuple) return Field.Access(self.field, self.offsets, idx_tuple,
is_absolute_access=self.is_absolute_access, dtype=self.dtype)
def _eval_subs(self, old, new):
return Field.Access(self.field,
tuple(sp.sympify(a).subs(old, new) for a in self.offsets),
tuple(sp.sympify(a).subs(old, new) for a in self.index),
is_absolute_access=self.is_absolute_access,
dtype=self.dtype)
@property @property
def is_absolute_access(self) -> bool: def is_absolute_access(self) -> bool:
...@@ -599,30 +759,125 @@ class Field(AbstractField): ...@@ -599,30 +759,125 @@ class Field(AbstractField):
def _hashable_content(self): def _hashable_content(self):
super_class_contents = super(Field.Access, self)._hashable_content() super_class_contents = super(Field.Access, self)._hashable_content()
return (super_class_contents, self._field.hashable_contents(), *self._index, *self._offsets) return (super_class_contents, self._field.hashable_contents(), *self._index,
*self._offsets, self._is_absolute_access)
def _staggered_offset(self, offsets, index):
assert FieldType.is_staggered(self._field)
neighbor = self._field.staggered_stencil[index]
neighbor = direction_string_to_offset(neighbor, self._field.spatial_dimensions)
return [(o + sp.Rational(int(neighbor[i]), 2)) for i, o in enumerate(offsets)]
def _latex(self, _): def _latex(self, _):
n = self._field.latex_name if self._field.latex_name else self._field.name n = self._field.latex_name if self._field.latex_name else self._field.name
offset_str = ",".join([sp.latex(o) for o in self.offsets]) offset_str = ",".join([sp.latex(o) for o in self.offsets])
if FieldType.is_staggered(self._field):
offset_str = ",".join([sp.latex(self._staggered_offset(self.offsets, self.index[0])[i])
for i in range(len(self.offsets))])
if self.is_absolute_access: if self.is_absolute_access:
offset_str = "\\mathbf{}".format(offset_str) offset_str = f"\\mathbf{offset_str}"
elif self.field.spatial_dimensions > 1: elif self.field.spatial_dimensions > 1:
offset_str = "({})".format(offset_str) offset_str = f"({offset_str})"
if self.index and self.index != (0,): if FieldType.is_staggered(self._field):
return "{{%s}_{%s}^{%s}}" % (n, offset_str, self.index if len(self.index) > 1 else self.index[0]) if self.index and self.field.index_dimensions > 1:
return f"{{{n}}}_{{{offset_str}}}^{{{self.index[1:] if len(self.index) > 2 else self.index[1]}}}"
else:
return f"{{{n}}}_{{{offset_str}}}"
else: else:
return "{{%s}_{%s}}" % (n, offset_str) if self.index and self.field.index_dimensions > 0:
return f"{{{n}}}_{{{offset_str}}}^{{{self.index if len(self.index) > 1 else self.index[0]}}}"
else:
return f"{{{n}}}_{{{offset_str}}}"
def __str__(self): def __str__(self):
n = self._field.latex_name if self._field.latex_name else self._field.name n = self._field.latex_name if self._field.latex_name else self._field.name
offset_str = ",".join([sp.latex(o) for o in self.offsets]) offset_str = ",".join([sp.latex(o) for o in self.offsets])
if FieldType.is_staggered(self._field):
offset_str = ",".join([sp.latex(self._staggered_offset(self.offsets, self.index[0])[i])
for i in range(len(self.offsets))])
if self.is_absolute_access: if self.is_absolute_access:
offset_str = "[abs]{}".format(offset_str) offset_str = f"[abs]{offset_str}"
if self.index and self.index != (0,):
return "%s[%s](%s)" % (n, offset_str, self.index if len(self.index) > 1 else self.index[0]) if FieldType.is_staggered(self._field):
if self.index and self.field.index_dimensions > 1:
return f"{n}[{offset_str}]({self.index[1:] if len(self.index) > 2 else self.index[1]})"
else:
return f"{n}[{offset_str}]"
else:
if self.index and self.field.index_dimensions > 0:
return f"{n}[{offset_str}]({self.index if len(self.index) > 1 else self.index[0]})"
else:
return f"{n}[{offset_str}]"
def fields(description=None, index_dimensions=0, layout=None,
field_type=FieldType.GENERIC, **kwargs) -> Union[Field, List[Field]]:
"""Creates pystencils fields from a string description.
Examples:
Create a 2D scalar and vector field:
>>> s, v = fields("s, v(2): double[2D]")
>>> assert s.spatial_dimensions == 2 and s.index_dimensions == 0
>>> assert (v.spatial_dimensions, v.index_dimensions, v.index_shape) == (2, 1, (2,))
Create an integer field of shape (10, 20):
>>> f = fields("f : int32[10, 20]")
>>> f.has_fixed_shape, f.shape
(True, (10, 20))
Numpy arrays can be used as template for shape and data type of field:
>>> arr_s, arr_v = np.zeros([20, 20]), np.zeros([20, 20, 2])
>>> s, v = fields("s, v(2)", s=arr_s, v=arr_v)
>>> assert s.index_dimensions == 0 and s.dtype.numpy_dtype == arr_s.dtype
>>> assert v.index_shape == (2,)
Format string can be left out, field names are taken from keyword arguments.
>>> fields(f1=arr_s, f2=arr_s)
[f1: double[20,20], f2: double[20,20]]
The keyword names ``index_dimension`` and ``layout`` have special meaning, don't use them for field names
>>> f = fields(f=arr_v, index_dimensions=1)
>>> assert f.index_dimensions == 1
>>> f = fields("pdfs(19) : float32[3D]", layout='fzyx')
>>> f.layout
(2, 1, 0)
"""
result = []
if description:
field_descriptions, dtype, shape = _parse_description(description)
layout = 'numpy' if layout is None else layout
for field_name, idx_shape in field_descriptions:
if field_name in kwargs:
arr = kwargs[field_name]
idx_shape_of_arr = () if not len(idx_shape) else arr.shape[-len(idx_shape):]
assert idx_shape_of_arr == idx_shape
f = Field.create_from_numpy_array(field_name, kwargs[field_name], index_dimensions=len(idx_shape),
field_type=field_type)
elif isinstance(shape, tuple):
f = Field.create_fixed_size(field_name, shape + idx_shape, dtype=dtype,
index_dimensions=len(idx_shape), layout=layout, field_type=field_type)
elif isinstance(shape, int):
f = Field.create_generic(field_name, spatial_dimensions=shape, dtype=dtype,
index_shape=idx_shape, layout=layout, field_type=field_type)
elif shape is None:
f = Field.create_generic(field_name, spatial_dimensions=2, dtype=dtype,
index_shape=idx_shape, layout=layout, field_type=field_type)
else: else:
return "%s[%s]" % (n, offset_str) assert False
result.append(f)
else:
assert layout is None, "Layout can not be specified when creating Field from numpy array"
for field_name, arr in kwargs.items():
result.append(Field.create_from_numpy_array(field_name, arr, index_dimensions=index_dimensions,
field_type=field_type))
if len(result) == 0:
raise ValueError("Could not parse field description")
elif len(result) == 1:
return result[0]
else:
return result
def get_layout_from_strides(strides: Sequence[int], index_dimension_ids: Optional[List[int]] = None): def get_layout_from_strides(strides: Sequence[int], index_dimension_ids: Optional[List[int]] = None):
...@@ -685,8 +940,6 @@ def create_numpy_array_with_layout(shape, layout, alignment=False, byte_offset=0 ...@@ -685,8 +940,6 @@ def create_numpy_array_with_layout(shape, layout, alignment=False, byte_offset=0
if not alignment: if not alignment:
res = np.empty(shape, order='c', **kwargs) res = np.empty(shape, order='c', **kwargs)
else: else:
if alignment is True:
alignment = 8 * 4
res = aligned_empty(shape, alignment, byte_offset=byte_offset, **kwargs) res = aligned_empty(shape, alignment, byte_offset=byte_offset, **kwargs)
for a, b in reversed(swaps): for a, b in reversed(swaps):
...@@ -695,24 +948,35 @@ def create_numpy_array_with_layout(shape, layout, alignment=False, byte_offset=0 ...@@ -695,24 +948,35 @@ def create_numpy_array_with_layout(shape, layout, alignment=False, byte_offset=0
def spatial_layout_string_to_tuple(layout_str: str, dim: int) -> Tuple[int, ...]: def spatial_layout_string_to_tuple(layout_str: str, dim: int) -> Tuple[int, ...]:
if layout_str in ('fzyx', 'zyxf'): if dim <= 0:
assert dim <= 3 raise ValueError("Dimensionality must be positive")
return tuple(reversed(range(dim)))
layout_str = layout_str.lower()
if layout_str in ('fzyx', 'f', 'reverse_numpy', 'SoA'): if layout_str in ('fzyx', 'zyxf', 'soa', 'aos'):
if dim > 3:
raise ValueError(f"Invalid spatial dimensionality for layout descriptor {layout_str}: May be at most 3.")
return tuple(reversed(range(dim)))
if layout_str in ('f', 'reverse_numpy'):
return tuple(reversed(range(dim))) return tuple(reversed(range(dim)))
elif layout_str in ('c', 'numpy', 'AoS'): elif layout_str in ('c', 'numpy'):
return tuple(range(dim)) return tuple(range(dim))
raise ValueError("Unknown layout descriptor " + layout_str) raise ValueError("Unknown layout descriptor " + layout_str)
def layout_string_to_tuple(layout_str, dim): def layout_string_to_tuple(layout_str, dim):
if dim <= 0:
raise ValueError("Dimensionality must be positive")
layout_str = layout_str.lower() layout_str = layout_str.lower()
if layout_str == 'fzyx' or layout_str == 'soa': if layout_str == 'fzyx' or layout_str == 'soa':
assert dim <= 4 if dim > 4:
raise ValueError(f"Invalid total dimensionality for layout descriptor {layout_str}: May be at most 4.")
return tuple(reversed(range(dim))) return tuple(reversed(range(dim)))
elif layout_str == 'zyxf' or layout_str == 'aos': elif layout_str == 'zyxf' or layout_str == 'aos':
assert dim <= 4 if dim > 4:
raise ValueError(f"Invalid total dimensionality for layout descriptor {layout_str}: May be at most 4.")
return tuple(reversed(range(dim - 1))) + (dim - 1,) return tuple(reversed(range(dim - 1))) + (dim - 1,)
elif layout_str == 'f' or layout_str == 'reverse_numpy': elif layout_str == 'f' or layout_str == 'reverse_numpy':
return tuple(reversed(range(dim))) return tuple(reversed(range(dim)))
...@@ -775,16 +1039,17 @@ type_description_regex = re.compile(r""" ...@@ -775,16 +1039,17 @@ type_description_regex = re.compile(r"""
""", re.VERBOSE | re.IGNORECASE) """, re.VERBOSE | re.IGNORECASE)
def _parse_description(description): def _parse_part1(d):
def parse_part1(d): result = field_description_regex.match(d)
while result:
name, index_str = result.group(1), result.group(2)
index = tuple(int(e) for e in index_str.split(",")) if index_str else ()
yield name, index
d = d[result.end():]
result = field_description_regex.match(d) result = field_description_regex.match(d)
while result:
name, index_str = result.group(1), result.group(2)
index = tuple(int(e) for e in index_str.split(",")) if index_str else ()
yield name, index
d = d[result.end():]
result = field_description_regex.match(d)
def _parse_description(description):
def parse_part2(d): def parse_part2(d):
result = type_description_regex.match(d) result = type_description_regex.match(d)
if result: if result:
...@@ -808,7 +1073,7 @@ def _parse_description(description): ...@@ -808,7 +1073,7 @@ def _parse_description(description):
else: else:
field_description, field_info = description, 'float64[2D]' field_description, field_info = description, 'float64[2D]'
fields_info = [e for e in parse_part1(field_description)] fields_info = [e for e in _parse_part1(field_description)]
if not field_info: if not field_info:
raise ValueError("Could not parse field description") raise ValueError("Could not parse field description")
......
import sympy as sp
from pystencils.typing import PointerType
class DivFunc(sp.Function):
"""
DivFunc represents a division operation, since sympy represents divisions with ^-1
"""
is_Atom = True
is_real = True
def __new__(cls, *args, **kwargs):
if len(args) != 2:
raise ValueError(f'{cls} takes only 2 arguments, instead {len(args)} received!')
divisor, dividend, *other_args = args
return sp.Function.__new__(cls, divisor, dividend, *other_args, **kwargs)
def _eval_evalf(self, *args, **kwargs):
return self.divisor.evalf() / self.dividend.evalf()
@property
def divisor(self):
return self.args[0]
@property
def dividend(self):
return self.args[1]
class AddressOf(sp.Function):
"""
AddressOf is the '&' operation in C. It gets the address of a lvalue.
"""
is_Atom = True
def __new__(cls, arg):
obj = sp.Function.__new__(cls, arg)
return obj
@property
def canonical(self):
if hasattr(self.args[0], 'canonical'):
return self.args[0].canonical
else:
raise NotImplementedError()
@property
def is_commutative(self):
return self.args[0].is_commutative
@property
def dtype(self):
if hasattr(self.args[0], 'dtype'):
return PointerType(self.args[0].dtype, restrict=True)
else:
raise ValueError(f'pystencils supports only non void pointers. Current address_of type: {self.args[0]}')
from pystencils.gpucuda.cudajit import make_python_function from pystencils.gpu.gpu_array_handler import GPUArrayHandler, GPUNotAvailableHandler
from pystencils.gpucuda.kernelcreation import create_cuda_kernel, created_indexed_cuda_kernel from pystencils.gpu.gpujit import make_python_function
from pystencils.gpu.kernelcreation import create_cuda_kernel, created_indexed_cuda_kernel
from .indexing import AbstractIndexing, BlockIndexing, LineIndexing from .indexing import AbstractIndexing, BlockIndexing, LineIndexing
__all__ = ['create_cuda_kernel', 'created_indexed_cuda_kernel', 'make_python_function', __all__ = ['GPUArrayHandler', 'GPUNotAvailableHandler',
'create_cuda_kernel', 'created_indexed_cuda_kernel', 'make_python_function',
'AbstractIndexing', 'BlockIndexing', 'LineIndexing'] 'AbstractIndexing', 'BlockIndexing', 'LineIndexing']
try:
import cupy as cp
import cupyx as cpx
except ImportError:
cp = None
cpx = None
import numpy as np
class GPUArrayHandler:
def __init__(self, device_number):
self._device_number = device_number
def zeros(self, shape, dtype=np.float64, order='C'):
with cp.cuda.Device(self._device_number):
return cp.zeros(shape=shape, dtype=dtype, order=order)
def ones(self, shape, dtype=np.float64, order='C'):
with cp.cuda.Device(self._device_number):
return cp.ones(shape=shape, dtype=dtype, order=order)
def empty(self, shape, dtype=np.float64, order='C'):
with cp.cuda.Device(self._device_number):
return cp.empty(shape=shape, dtype=dtype, order=order)
def to_gpu(self, numpy_array):
swaps = _get_index_swaps(numpy_array)
if numpy_array.base is not None and isinstance(numpy_array.base, np.ndarray):
with cp.cuda.Device(self._device_number):
gpu_array = cp.asarray(numpy_array.base)
for a, b in reversed(swaps):
gpu_array = gpu_array.swapaxes(a, b)
return gpu_array
else:
return cp.asarray(numpy_array)
def upload(self, array, numpy_array):
assert self._device_number == array.device.id
if numpy_array.base is not None and isinstance(numpy_array.base, np.ndarray):
with cp.cuda.Device(self._device_number):
array.base.set(numpy_array.base)
else:
with cp.cuda.Device(self._device_number):
array.set(numpy_array)
def download(self, array, numpy_array):
assert self._device_number == array.device.id
if numpy_array.base is not None and isinstance(numpy_array.base, np.ndarray):
with cp.cuda.Device(self._device_number):
numpy_array.base[:] = array.base.get()
else:
with cp.cuda.Device(self._device_number):
numpy_array[:] = array.get()
def randn(self, shape, dtype=np.float64):
with cp.cuda.Device(self._device_number):
return cp.random.randn(*shape, dtype=dtype)
@staticmethod
def pinned_numpy_array(layout, shape, dtype):
assert set(layout) == set(range(len(shape))), "Wrong layout descriptor"
cur_layout = list(range(len(shape)))
swaps = []
for i in range(len(layout)):
if cur_layout[i] != layout[i]:
index_to_swap_with = cur_layout.index(layout[i])
swaps.append((i, index_to_swap_with))
cur_layout[i], cur_layout[index_to_swap_with] = cur_layout[index_to_swap_with], cur_layout[i]
assert tuple(cur_layout) == tuple(layout)
shape = list(shape)
for a, b in swaps:
shape[a], shape[b] = shape[b], shape[a]
res = cpx.empty_pinned(tuple(shape), order='c', dtype=dtype)
for a, b in reversed(swaps):
res = res.swapaxes(a, b)
return res
from_numpy = to_gpu
class GPUNotAvailableHandler:
def __getattribute__(self, name):
raise NotImplementedError("Unable to utilise cupy! Please make sure cupy works correctly in your setup!")
def _get_index_swaps(array):
swaps = []
if array.base is not None and isinstance(array.base, np.ndarray):
for stride in array.base.strides:
index_base = array.base.strides.index(stride)
index_view = array.strides.index(stride)
if index_base != index_view and (index_view, index_base) not in swaps:
swaps.append((index_base, index_view))
return swaps